diff --git a/Figures/figure1.pdf b/Figures/figure1.pdf index b93a7b4..db7e482 100644 Binary files a/Figures/figure1.pdf and b/Figures/figure1.pdf differ diff --git a/benchmark_duplicates/benchmarks_with_resampled_base_test_0.pdf b/benchmark_duplicates/benchmarks_with_resampled_base_test_0.pdf index cd8cfa5..d18bceb 100644 Binary files a/benchmark_duplicates/benchmarks_with_resampled_base_test_0.pdf and b/benchmark_duplicates/benchmarks_with_resampled_base_test_0.pdf differ diff --git a/benchmark_duplicates/benchmarks_with_resampled_base_test_1.pdf b/benchmark_duplicates/benchmarks_with_resampled_base_test_1.pdf index b3579dd..10d68c6 100644 Binary files a/benchmark_duplicates/benchmarks_with_resampled_base_test_1.pdf and b/benchmark_duplicates/benchmarks_with_resampled_base_test_1.pdf differ diff --git a/benchmark_duplicates/benchmarks_with_resampled_base_test_2.pdf b/benchmark_duplicates/benchmarks_with_resampled_base_test_2.pdf index e0f2be5..d6eb512 100644 Binary files a/benchmark_duplicates/benchmarks_with_resampled_base_test_2.pdf and b/benchmark_duplicates/benchmarks_with_resampled_base_test_2.pdf differ diff --git a/benchmark_duplicates/benchmarks_with_resampled_base_test_3.pdf b/benchmark_duplicates/benchmarks_with_resampled_base_test_3.pdf index 8ff3172..43b2f5a 100644 Binary files a/benchmark_duplicates/benchmarks_with_resampled_base_test_3.pdf and b/benchmark_duplicates/benchmarks_with_resampled_base_test_3.pdf differ diff --git a/benchmark_duplicates/benchmarks_with_resampled_base_test_4.pdf b/benchmark_duplicates/benchmarks_with_resampled_base_test_4.pdf index 3a608bd..105c062 100644 Binary files a/benchmark_duplicates/benchmarks_with_resampled_base_test_4.pdf and b/benchmark_duplicates/benchmarks_with_resampled_base_test_4.pdf differ diff --git a/benchmark_duplicates/benchmarks_with_resampled_base_test_5.pdf b/benchmark_duplicates/benchmarks_with_resampled_base_test_5.pdf index cc4920d..4f3c380 100644 Binary files a/benchmark_duplicates/benchmarks_with_resampled_base_test_5.pdf and b/benchmark_duplicates/benchmarks_with_resampled_base_test_5.pdf differ diff --git a/benchmark_duplicates/benchmarks_with_resampled_base_test_6.pdf b/benchmark_duplicates/benchmarks_with_resampled_base_test_6.pdf index 61577df..b0e5911 100644 Binary files a/benchmark_duplicates/benchmarks_with_resampled_base_test_6.pdf and b/benchmark_duplicates/benchmarks_with_resampled_base_test_6.pdf differ diff --git a/benchmark_duplicates/benchmarks_with_resampled_base_test_7.pdf b/benchmark_duplicates/benchmarks_with_resampled_base_test_7.pdf index ecb7694..db605ee 100644 Binary files a/benchmark_duplicates/benchmarks_with_resampled_base_test_7.pdf and b/benchmark_duplicates/benchmarks_with_resampled_base_test_7.pdf differ diff --git a/benchmark_duplicates/benchmarks_with_resampled_base_test_8.pdf b/benchmark_duplicates/benchmarks_with_resampled_base_test_8.pdf index 7e230d0..a7e99c0 100644 Binary files a/benchmark_duplicates/benchmarks_with_resampled_base_test_8.pdf and b/benchmark_duplicates/benchmarks_with_resampled_base_test_8.pdf differ diff --git a/benchmark_duplicates/benchmarks_with_resampled_base_test_9.pdf b/benchmark_duplicates/benchmarks_with_resampled_base_test_9.pdf index 189c18c..0f9b2d1 100644 Binary files a/benchmark_duplicates/benchmarks_with_resampled_base_test_9.pdf and b/benchmark_duplicates/benchmarks_with_resampled_base_test_9.pdf differ diff --git a/benchmark_duplicates/cgm_cluster_maf_0.pkl b/benchmark_duplicates/cgm_cluster_maf_0.pkl index ca1a701..c20892b 100644 Binary files a/benchmark_duplicates/cgm_cluster_maf_0.pkl and b/benchmark_duplicates/cgm_cluster_maf_0.pkl differ diff --git a/benchmark_duplicates/cgm_cluster_maf_1.pkl b/benchmark_duplicates/cgm_cluster_maf_1.pkl index d1fe012..68a697f 100644 Binary files a/benchmark_duplicates/cgm_cluster_maf_1.pkl and b/benchmark_duplicates/cgm_cluster_maf_1.pkl differ diff --git a/benchmark_duplicates/cgm_cluster_maf_2.pkl b/benchmark_duplicates/cgm_cluster_maf_2.pkl index e181a1c..67ded7a 100644 Binary files a/benchmark_duplicates/cgm_cluster_maf_2.pkl and b/benchmark_duplicates/cgm_cluster_maf_2.pkl differ diff --git a/benchmark_duplicates/cgm_cluster_maf_3.pkl b/benchmark_duplicates/cgm_cluster_maf_3.pkl index 9288f07..134b218 100644 Binary files a/benchmark_duplicates/cgm_cluster_maf_3.pkl and b/benchmark_duplicates/cgm_cluster_maf_3.pkl differ diff --git a/benchmark_duplicates/cgm_cluster_maf_4.pkl b/benchmark_duplicates/cgm_cluster_maf_4.pkl index 470d41b..4747f25 100644 Binary files a/benchmark_duplicates/cgm_cluster_maf_4.pkl and b/benchmark_duplicates/cgm_cluster_maf_4.pkl differ diff --git a/benchmark_duplicates/cgm_cluster_maf_5.pkl b/benchmark_duplicates/cgm_cluster_maf_5.pkl index 1967437..226ed71 100644 Binary files a/benchmark_duplicates/cgm_cluster_maf_5.pkl and b/benchmark_duplicates/cgm_cluster_maf_5.pkl differ diff --git a/benchmark_duplicates/cgm_cluster_maf_6.pkl b/benchmark_duplicates/cgm_cluster_maf_6.pkl index 92345e1..bf420de 100644 Binary files a/benchmark_duplicates/cgm_cluster_maf_6.pkl and b/benchmark_duplicates/cgm_cluster_maf_6.pkl differ diff --git a/benchmark_duplicates/cgm_cluster_maf_7.pkl b/benchmark_duplicates/cgm_cluster_maf_7.pkl index a2d5cb1..b0dea32 100644 Binary files a/benchmark_duplicates/cgm_cluster_maf_7.pkl and b/benchmark_duplicates/cgm_cluster_maf_7.pkl differ diff --git a/benchmark_duplicates/cgm_cluster_maf_8.pkl b/benchmark_duplicates/cgm_cluster_maf_8.pkl index 12a3479..ec8f3f6 100644 Binary files a/benchmark_duplicates/cgm_cluster_maf_8.pkl and b/benchmark_duplicates/cgm_cluster_maf_8.pkl differ diff --git a/benchmark_duplicates/cgm_cluster_maf_9.pkl b/benchmark_duplicates/cgm_cluster_maf_9.pkl index a3c29d8..7f90922 100644 Binary files a/benchmark_duplicates/cgm_cluster_maf_9.pkl and b/benchmark_duplicates/cgm_cluster_maf_9.pkl differ diff --git a/benchmark_duplicates/cgm_realnvp_resampled_base_0.pkl b/benchmark_duplicates/cgm_realnvp_resampled_base_0.pkl index e85a345..ffcc76d 100644 Binary files a/benchmark_duplicates/cgm_realnvp_resampled_base_0.pkl and b/benchmark_duplicates/cgm_realnvp_resampled_base_0.pkl differ diff --git a/benchmark_duplicates/cgm_realnvp_resampled_base_1.pkl b/benchmark_duplicates/cgm_realnvp_resampled_base_1.pkl index 630773a..0c48dac 100644 Binary files a/benchmark_duplicates/cgm_realnvp_resampled_base_1.pkl and b/benchmark_duplicates/cgm_realnvp_resampled_base_1.pkl differ diff --git a/benchmark_duplicates/cgm_realnvp_resampled_base_2.pkl b/benchmark_duplicates/cgm_realnvp_resampled_base_2.pkl index 7f6fd55..64d2e83 100644 Binary files a/benchmark_duplicates/cgm_realnvp_resampled_base_2.pkl and b/benchmark_duplicates/cgm_realnvp_resampled_base_2.pkl differ diff --git a/benchmark_duplicates/cgm_realnvp_resampled_base_3.pkl b/benchmark_duplicates/cgm_realnvp_resampled_base_3.pkl index e105067..23ebf21 100644 Binary files a/benchmark_duplicates/cgm_realnvp_resampled_base_3.pkl and b/benchmark_duplicates/cgm_realnvp_resampled_base_3.pkl differ diff --git a/benchmark_duplicates/cgm_realnvp_resampled_base_4.pkl b/benchmark_duplicates/cgm_realnvp_resampled_base_4.pkl index 036a72b..1cac76f 100644 Binary files a/benchmark_duplicates/cgm_realnvp_resampled_base_4.pkl and b/benchmark_duplicates/cgm_realnvp_resampled_base_4.pkl differ diff --git a/benchmark_duplicates/cgm_realnvp_resampled_base_5.pkl b/benchmark_duplicates/cgm_realnvp_resampled_base_5.pkl index 1bb4b06..aa34ca5 100644 Binary files a/benchmark_duplicates/cgm_realnvp_resampled_base_5.pkl and b/benchmark_duplicates/cgm_realnvp_resampled_base_5.pkl differ diff --git a/benchmark_duplicates/cgm_realnvp_resampled_base_6.pkl b/benchmark_duplicates/cgm_realnvp_resampled_base_6.pkl index 037db17..128086b 100644 Binary files a/benchmark_duplicates/cgm_realnvp_resampled_base_6.pkl and b/benchmark_duplicates/cgm_realnvp_resampled_base_6.pkl differ diff --git a/benchmark_duplicates/cgm_realnvp_resampled_base_7.pkl b/benchmark_duplicates/cgm_realnvp_resampled_base_7.pkl index e8f2b4d..58b6a08 100644 Binary files a/benchmark_duplicates/cgm_realnvp_resampled_base_7.pkl and b/benchmark_duplicates/cgm_realnvp_resampled_base_7.pkl differ diff --git a/benchmark_duplicates/cgm_realnvp_resampled_base_8.pkl b/benchmark_duplicates/cgm_realnvp_resampled_base_8.pkl index c56c1a7..965e8d0 100644 Binary files a/benchmark_duplicates/cgm_realnvp_resampled_base_8.pkl and b/benchmark_duplicates/cgm_realnvp_resampled_base_8.pkl differ diff --git a/benchmark_duplicates/cgm_realnvp_resampled_base_9.pkl b/benchmark_duplicates/cgm_realnvp_resampled_base_9.pkl index 7c01068..69eb479 100644 Binary files a/benchmark_duplicates/cgm_realnvp_resampled_base_9.pkl and b/benchmark_duplicates/cgm_realnvp_resampled_base_9.pkl differ diff --git a/benchmark_duplicates/cgm_single_maf_0.pkl b/benchmark_duplicates/cgm_single_maf_0.pkl index 67031d8..1ba3fb9 100644 Binary files a/benchmark_duplicates/cgm_single_maf_0.pkl and b/benchmark_duplicates/cgm_single_maf_0.pkl differ diff --git a/benchmark_duplicates/cgm_single_maf_1.pkl b/benchmark_duplicates/cgm_single_maf_1.pkl index d31a16b..3c4df20 100644 Binary files a/benchmark_duplicates/cgm_single_maf_1.pkl and b/benchmark_duplicates/cgm_single_maf_1.pkl differ diff --git a/benchmark_duplicates/cgm_single_maf_2.pkl b/benchmark_duplicates/cgm_single_maf_2.pkl index 196b7be..163055d 100644 Binary files a/benchmark_duplicates/cgm_single_maf_2.pkl and b/benchmark_duplicates/cgm_single_maf_2.pkl differ diff --git a/benchmark_duplicates/cgm_single_maf_3.pkl b/benchmark_duplicates/cgm_single_maf_3.pkl index 7412639..e242c5a 100644 Binary files a/benchmark_duplicates/cgm_single_maf_3.pkl and b/benchmark_duplicates/cgm_single_maf_3.pkl differ diff --git a/benchmark_duplicates/cgm_single_maf_4.pkl b/benchmark_duplicates/cgm_single_maf_4.pkl index 0d1ac89..5b6190b 100644 Binary files a/benchmark_duplicates/cgm_single_maf_4.pkl and b/benchmark_duplicates/cgm_single_maf_4.pkl differ diff --git a/benchmark_duplicates/cgm_single_maf_5.pkl b/benchmark_duplicates/cgm_single_maf_5.pkl index d66887d..60b3ffb 100644 Binary files a/benchmark_duplicates/cgm_single_maf_5.pkl and b/benchmark_duplicates/cgm_single_maf_5.pkl differ diff --git a/benchmark_duplicates/cgm_single_maf_6.pkl b/benchmark_duplicates/cgm_single_maf_6.pkl index 38ab1cc..98e8b51 100644 Binary files a/benchmark_duplicates/cgm_single_maf_6.pkl and b/benchmark_duplicates/cgm_single_maf_6.pkl differ diff --git a/benchmark_duplicates/cgm_single_maf_7.pkl b/benchmark_duplicates/cgm_single_maf_7.pkl index 78afd28..441f0c3 100644 Binary files a/benchmark_duplicates/cgm_single_maf_7.pkl and b/benchmark_duplicates/cgm_single_maf_7.pkl differ diff --git a/benchmark_duplicates/cgm_single_maf_8.pkl b/benchmark_duplicates/cgm_single_maf_8.pkl index 83f9af7..cff812e 100644 Binary files a/benchmark_duplicates/cgm_single_maf_8.pkl and b/benchmark_duplicates/cgm_single_maf_8.pkl differ diff --git a/benchmark_duplicates/cgm_single_maf_9.pkl b/benchmark_duplicates/cgm_single_maf_9.pkl index 7a89261..9b761df 100644 Binary files a/benchmark_duplicates/cgm_single_maf_9.pkl and b/benchmark_duplicates/cgm_single_maf_9.pkl differ diff --git a/benchmark_duplicates/rm_cluster_maf_0.pkl b/benchmark_duplicates/rm_cluster_maf_0.pkl index 71cf2b6..b3ee85f 100644 Binary files a/benchmark_duplicates/rm_cluster_maf_0.pkl and b/benchmark_duplicates/rm_cluster_maf_0.pkl differ diff --git a/benchmark_duplicates/rm_cluster_maf_1.pkl b/benchmark_duplicates/rm_cluster_maf_1.pkl index 84d932f..4d640e1 100644 Binary files a/benchmark_duplicates/rm_cluster_maf_1.pkl and b/benchmark_duplicates/rm_cluster_maf_1.pkl differ diff --git a/benchmark_duplicates/rm_cluster_maf_2.pkl b/benchmark_duplicates/rm_cluster_maf_2.pkl index 0e599cc..1aa22f2 100644 Binary files a/benchmark_duplicates/rm_cluster_maf_2.pkl and b/benchmark_duplicates/rm_cluster_maf_2.pkl differ diff --git a/benchmark_duplicates/rm_cluster_maf_3.pkl b/benchmark_duplicates/rm_cluster_maf_3.pkl index cb340c9..4987945 100644 Binary files a/benchmark_duplicates/rm_cluster_maf_3.pkl and b/benchmark_duplicates/rm_cluster_maf_3.pkl differ diff --git a/benchmark_duplicates/rm_cluster_maf_4.pkl b/benchmark_duplicates/rm_cluster_maf_4.pkl index ec7ad4c..4df793a 100644 Binary files a/benchmark_duplicates/rm_cluster_maf_4.pkl and b/benchmark_duplicates/rm_cluster_maf_4.pkl differ diff --git a/benchmark_duplicates/rm_cluster_maf_5.pkl b/benchmark_duplicates/rm_cluster_maf_5.pkl index 9235724..ff462bb 100644 Binary files a/benchmark_duplicates/rm_cluster_maf_5.pkl and b/benchmark_duplicates/rm_cluster_maf_5.pkl differ diff --git a/benchmark_duplicates/rm_cluster_maf_6.pkl b/benchmark_duplicates/rm_cluster_maf_6.pkl index 7a13df0..8a8a101 100644 Binary files a/benchmark_duplicates/rm_cluster_maf_6.pkl and b/benchmark_duplicates/rm_cluster_maf_6.pkl differ diff --git a/benchmark_duplicates/rm_cluster_maf_7.pkl b/benchmark_duplicates/rm_cluster_maf_7.pkl index 21a64fe..da72716 100644 Binary files a/benchmark_duplicates/rm_cluster_maf_7.pkl and b/benchmark_duplicates/rm_cluster_maf_7.pkl differ diff --git a/benchmark_duplicates/rm_cluster_maf_8.pkl b/benchmark_duplicates/rm_cluster_maf_8.pkl index 198229c..d981216 100644 Binary files a/benchmark_duplicates/rm_cluster_maf_8.pkl and b/benchmark_duplicates/rm_cluster_maf_8.pkl differ diff --git a/benchmark_duplicates/rm_cluster_maf_9.pkl b/benchmark_duplicates/rm_cluster_maf_9.pkl index e959302..a20c298 100644 Binary files a/benchmark_duplicates/rm_cluster_maf_9.pkl and b/benchmark_duplicates/rm_cluster_maf_9.pkl differ diff --git a/benchmark_duplicates/rm_realnvp_resampled_base_0.pkl b/benchmark_duplicates/rm_realnvp_resampled_base_0.pkl index 183a4e0..38dd8f4 100644 Binary files a/benchmark_duplicates/rm_realnvp_resampled_base_0.pkl and b/benchmark_duplicates/rm_realnvp_resampled_base_0.pkl differ diff --git a/benchmark_duplicates/rm_realnvp_resampled_base_1.pkl b/benchmark_duplicates/rm_realnvp_resampled_base_1.pkl index e38a9e2..f34d458 100644 Binary files a/benchmark_duplicates/rm_realnvp_resampled_base_1.pkl and b/benchmark_duplicates/rm_realnvp_resampled_base_1.pkl differ diff --git a/benchmark_duplicates/rm_realnvp_resampled_base_2.pkl b/benchmark_duplicates/rm_realnvp_resampled_base_2.pkl index 9a80c8b..f12caaf 100644 Binary files a/benchmark_duplicates/rm_realnvp_resampled_base_2.pkl and b/benchmark_duplicates/rm_realnvp_resampled_base_2.pkl differ diff --git a/benchmark_duplicates/rm_realnvp_resampled_base_3.pkl b/benchmark_duplicates/rm_realnvp_resampled_base_3.pkl index 29b7cb9..7ec97fc 100644 Binary files a/benchmark_duplicates/rm_realnvp_resampled_base_3.pkl and b/benchmark_duplicates/rm_realnvp_resampled_base_3.pkl differ diff --git a/benchmark_duplicates/rm_realnvp_resampled_base_4.pkl b/benchmark_duplicates/rm_realnvp_resampled_base_4.pkl index 78388b2..1138645 100644 Binary files a/benchmark_duplicates/rm_realnvp_resampled_base_4.pkl and b/benchmark_duplicates/rm_realnvp_resampled_base_4.pkl differ diff --git a/benchmark_duplicates/rm_realnvp_resampled_base_5.pkl b/benchmark_duplicates/rm_realnvp_resampled_base_5.pkl index 176d9f3..f4b10aa 100644 Binary files a/benchmark_duplicates/rm_realnvp_resampled_base_5.pkl and b/benchmark_duplicates/rm_realnvp_resampled_base_5.pkl differ diff --git a/benchmark_duplicates/rm_realnvp_resampled_base_6.pkl b/benchmark_duplicates/rm_realnvp_resampled_base_6.pkl index 348021a..d8753bb 100644 Binary files a/benchmark_duplicates/rm_realnvp_resampled_base_6.pkl and b/benchmark_duplicates/rm_realnvp_resampled_base_6.pkl differ diff --git a/benchmark_duplicates/rm_realnvp_resampled_base_7.pkl b/benchmark_duplicates/rm_realnvp_resampled_base_7.pkl index b21c6e5..6ddcb7e 100644 Binary files a/benchmark_duplicates/rm_realnvp_resampled_base_7.pkl and b/benchmark_duplicates/rm_realnvp_resampled_base_7.pkl differ diff --git a/benchmark_duplicates/rm_realnvp_resampled_base_8.pkl b/benchmark_duplicates/rm_realnvp_resampled_base_8.pkl index 57eeac5..321140d 100644 Binary files a/benchmark_duplicates/rm_realnvp_resampled_base_8.pkl and b/benchmark_duplicates/rm_realnvp_resampled_base_8.pkl differ diff --git a/benchmark_duplicates/rm_realnvp_resampled_base_9.pkl b/benchmark_duplicates/rm_realnvp_resampled_base_9.pkl index cede83c..d529940 100644 Binary files a/benchmark_duplicates/rm_realnvp_resampled_base_9.pkl and b/benchmark_duplicates/rm_realnvp_resampled_base_9.pkl differ diff --git a/benchmark_duplicates/rm_single_maf_0.pkl b/benchmark_duplicates/rm_single_maf_0.pkl index f573312..8e866e3 100644 Binary files a/benchmark_duplicates/rm_single_maf_0.pkl and b/benchmark_duplicates/rm_single_maf_0.pkl differ diff --git a/benchmark_duplicates/rm_single_maf_1.pkl b/benchmark_duplicates/rm_single_maf_1.pkl index 6d8313d..c1c2d46 100644 Binary files a/benchmark_duplicates/rm_single_maf_1.pkl and b/benchmark_duplicates/rm_single_maf_1.pkl differ diff --git a/benchmark_duplicates/rm_single_maf_2.pkl b/benchmark_duplicates/rm_single_maf_2.pkl index c1e05ff..05f2145 100644 Binary files a/benchmark_duplicates/rm_single_maf_2.pkl and b/benchmark_duplicates/rm_single_maf_2.pkl differ diff --git a/benchmark_duplicates/rm_single_maf_3.pkl b/benchmark_duplicates/rm_single_maf_3.pkl index 1f2001b..c7ced49 100644 Binary files a/benchmark_duplicates/rm_single_maf_3.pkl and b/benchmark_duplicates/rm_single_maf_3.pkl differ diff --git a/benchmark_duplicates/rm_single_maf_4.pkl b/benchmark_duplicates/rm_single_maf_4.pkl index a53b5c8..ce13fae 100644 Binary files a/benchmark_duplicates/rm_single_maf_4.pkl and b/benchmark_duplicates/rm_single_maf_4.pkl differ diff --git a/benchmark_duplicates/rm_single_maf_5.pkl b/benchmark_duplicates/rm_single_maf_5.pkl index b0a6e5a..86837ad 100644 Binary files a/benchmark_duplicates/rm_single_maf_5.pkl and b/benchmark_duplicates/rm_single_maf_5.pkl differ diff --git a/benchmark_duplicates/rm_single_maf_6.pkl b/benchmark_duplicates/rm_single_maf_6.pkl index f7a03b1..52f36d5 100644 Binary files a/benchmark_duplicates/rm_single_maf_6.pkl and b/benchmark_duplicates/rm_single_maf_6.pkl differ diff --git a/benchmark_duplicates/rm_single_maf_7.pkl b/benchmark_duplicates/rm_single_maf_7.pkl index 5c37492..8f0d71b 100644 Binary files a/benchmark_duplicates/rm_single_maf_7.pkl and b/benchmark_duplicates/rm_single_maf_7.pkl differ diff --git a/benchmark_duplicates/rm_single_maf_8.pkl b/benchmark_duplicates/rm_single_maf_8.pkl index 59f094f..b1a4d7d 100644 Binary files a/benchmark_duplicates/rm_single_maf_8.pkl and b/benchmark_duplicates/rm_single_maf_8.pkl differ diff --git a/benchmark_duplicates/rm_single_maf_9.pkl b/benchmark_duplicates/rm_single_maf_9.pkl index 3a06b26..ca39926 100644 Binary files a/benchmark_duplicates/rm_single_maf_9.pkl and b/benchmark_duplicates/rm_single_maf_9.pkl differ diff --git a/benchmark_duplicates/slurm-29410985.out b/benchmark_duplicates/slurm-29410985.out new file mode 100644 index 0000000..96e7e7a --- /dev/null +++ b/benchmark_duplicates/slurm-29410985.out @@ -0,0 +1,781 @@ +Loading rhel8/default-icl + Loading requirement: dot rhel8/slurm singularity/current rhel8/global + cuda/11.4 vgl/2.5.1/64 intel-oneapi-compilers/2022.1.0/gcc/b6zld2mz + intel-oneapi-mpi/2021.6.0/intel/guxuvcpm +Changed directory to /rds/user/htjb2/hpc-work/piecewise-normalizing-flows-reviews. + +JobID: 29410985 +====== +Time: Wed 11 Oct 10:07:38 BST 2023 +Running on master node: cpu-q-15 +Current directory: /rds/user/htjb2/hpc-work/piecewise-normalizing-flows-reviews + +Nodes allocated: +================ +cpu-q-15 + +numtasks=76, numnodes=1, mpi_tasks_per_node=76 (OMP_NUM_THREADS=1) + +Executing command: +================== +python benchmarks.py + +2023-10-11 10:07:44.716823: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2023-10-11 10:07:44.931949: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +2023-10-11 10:07:47.226281: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +2023-10-11 10:07:57.597712: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected +2023-10-11 10:07:57.842901: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0xc6b9c10 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2023-10-11 10:07:57.843058: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2023-10-11 10:07:58.049506: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process. +Warning: Dependencies for Boltzmann generators could not be loaded. Other models can still be used. +Warning: Dependencies for Boltzmann generators could not be loaded. Other models can still be used. + 0%| | 0/20000 [00:00 triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +Early stopped. Epochs used = 4229. Minimum at epoch = 3829 + 0%| | 0/20000 [00:00 triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +Early stopped. Epochs used = 5577. Minimum at epoch = 5177 + 0%| | 0/20000 [00:00 triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +WARNING:tensorflow:6 out of the last 6 calls to triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +Early stopped. Epochs used = 3113. Minimum at epoch = 2713 + 0%| | 0/20000 [00:00 triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +/rds/user/htjb2/hpc-work/piecewise-normalizing-flows-reviews/myenv/lib/python3.8/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning + super()._check_params_vs_input(X, default_n_init=10) +Early stopped. Epochs used = 849. Minimum at epoch = 449 +number clusters: 2 number_networks: 4 + 0%| | 0/20000 [00:00 triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +Early stopped. Epochs used = 2839. Minimum at epoch = 2439 + 0%| | 0/20000 [00:00 triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +/rds/user/htjb2/hpc-work/piecewise-normalizing-flows-reviews/myenv/lib/python3.8/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning + super()._check_params_vs_input(X, default_n_init=10) +Early stopped. Epochs used = 4582. Minimum at epoch = 4182 +number clusters: 8 number_networks: 1 + 0%| | 0/20000 [00:00 triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +Early stopped. Epochs used = 2895. Minimum at epoch = 2495 + 0%| | 0/20000 [00:00 triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +WARNING:tensorflow:6 out of the last 6 calls to triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +WARNING:tensorflow:5 out of the last 6 calls to triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +WARNING:tensorflow:6 out of the last 7 calls to triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +/rds/user/htjb2/hpc-work/piecewise-normalizing-flows-reviews/myenv/lib/python3.8/site-packages/margarine/clustered.py:174: UserWarning: The number of clusters is 20. This is the maximum number of clusters that can be used. If you require more clusters, please specify the 'cluster_number' kwarg. margarine will continue with 20 clusters. + warnings.warn("The number of clusters is 20. " + +WARNING:tensorflow:5 out of the last 5 calls to triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +WARNING:tensorflow:5 out of the last 5 calls to triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +WARNING:tensorflow:6 out of the last 6 calls to triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +WARNING:tensorflow:6 out of the last 6 calls to triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. +/rds/user/htjb2/hpc-work/piecewise-normalizing-flows-reviews/myenv/lib/python3.8/site-packages/margarine/clustered.py:174: UserWarning: The number of clusters is 20. This is the maximum number of clusters that can be used. If you require more clusters, please specify the 'cluster_number' kwarg. margarine will continue with 20 clusters. + warnings.warn("The number of clusters is 20. " + +maf 0.04031182 0.028188227102607325 +kmeans 0.036751173 0.014605213892256943 +minbatchkmeans 0.036751173 0.014605213892256943 +mean_shift 0.039975084 0.016644943875773675 +spectral_clustering 0.80715007 0.07794943311681986 +agglomerative_clustering 0.4231082 0.06230989730486836 +birch 0.035159063 0.014266189726573182 diff --git a/datasets/gas.py b/datasets/gas.py new file mode 100644 index 0000000..384000f --- /dev/null +++ b/datasets/gas.py @@ -0,0 +1,73 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt + +import datasets + + +class GAS: + + class Data: + + def __init__(self, data): + + self.x = data.astype(np.float32) + self.N = self.x.shape[0] + + def __init__(self): + + file = 'raw_physical_data/gas/ethylene_CO.pickle' + trn, val, tst = load_data_and_clean_and_split(file) + + self.trn = self.Data(trn) + self.val = self.Data(val) + self.tst = self.Data(tst) + + self.n_dims = self.trn.x.shape[1] + + +def load_data(file): + + data = pd.read_pickle(file) + # data = pd.read_pickle(file).sample(frac=0.25) + # data.to_pickle(file) + data.drop("Meth", axis=1, inplace=True) + data.drop("Eth", axis=1, inplace=True) + data.drop("Time", axis=1, inplace=True) + return data + + +def get_correlation_numbers(data): + C = data.corr() + A = C > 0.98 + B = A.values.sum(axis=1) + return B + + +def load_data_and_clean(file): + + data = load_data(file) + B = get_correlation_numbers(data) + + while np.any(B > 1): + col_to_remove = np.where(B > 1)[0][0] + col_name = data.columns[col_to_remove] + data.drop(col_name, axis=1, inplace=True) + B = get_correlation_numbers(data) + # print(data.corr()) + data = (data-data.mean())/data.std() + + return data + + +def load_data_and_clean_and_split(file): + + data = load_data_and_clean(file).values + N_test = int(0.1*data.shape[0]) + data_test = data[-N_test:] + data_train = data[0:-N_test] + N_validate = int(0.1*data_train.shape[0]) + data_validate = data_train[-N_validate:] + data_train = data_train[0:-N_validate] + + return data_train, data_validate, data_test diff --git a/figure1.py b/figure1.py index 30ba127..aa0888f 100644 --- a/figure1.py +++ b/figure1.py @@ -2,6 +2,7 @@ from anesthetic import MCMCSamples import matplotlib.pyplot as plt from margarine.maf import MAF +from margarine.clustered import clusterMAF # generate a set of multi-modal samples nsamples = 5000 @@ -17,32 +18,32 @@ fig, axes = plt.subplots(1, 3, figsize=(6.3, 3)) # plot original samples -axes[0].hist2d(data[:, 0], data[:, 1], bins=80) +axes[0].hist2d(data[:, 0], data[:, 1], bins=80, cmap='Blues') # try and load the example maf only flow else generate and plot samples try: bij = MAF.load("figure1_normal_maf.pkl") except: bij = MAF(samples[names].values, - samples.get_weights().astype('float64')) + weights=samples.get_weights().astype('float64')) bij.train(10000, early_stop=True) bij.save("figure1_normal_maf.pkl") bij_samples = bij.sample(nsamples) -axes[1].hist2d(bij_samples[:, 0], bij_samples[:, 1], bins=80) +axes[1].hist2d(bij_samples[:, 0], bij_samples[:, 1], bins=80, cmap='Blues') # try and load the example cluster maf only flow else generate and plot samples try: - bij = MAF.load("figure1_cluster.pkl") + bij = clusterMAF.load("figure1_cluster.pkl") except: - bij = MAF(samples[names].values, - samples.get_weights().astype('float64'), clustering=True) + bij = clusterMAF(samples[names].values, + weights=samples.get_weights().astype('float64'), clustering=True) bij.train(10000, early_stop=True) bij.save("figure1_cluster.pkl") bij_samples = bij.sample(nsamples) -axes[2].hist2d(bij_samples[:, 0], bij_samples[:, 1], bins=80) +axes[2].hist2d(bij_samples[:, 0], bij_samples[:, 1], bins=80, cmap='Blues') # formatting -title = ['Target.', 'MAF\nGaussian Base\ne.g. Papamakarios et al. 2017', +title = ['Target', 'MAF\nGaussian Base\ne.g. Papamakarios et al. 2017', 'Piecewise MAF\nGaussian Base\nThis work'] for i in range(len(axes)): axes[i].set_xticks([]) diff --git a/figure1_cluster.pkl b/figure1_cluster.pkl index 106cae6..de2e82d 100644 Binary files a/figure1_cluster.pkl and b/figure1_cluster.pkl differ diff --git a/figure1_normal_maf.pkl b/figure1_normal_maf.pkl index 47830b6..dcc015a 100644 Binary files a/figure1_normal_maf.pkl and b/figure1_normal_maf.pkl differ diff --git a/figure2.pdf b/figure2.pdf new file mode 100644 index 0000000..b614f5d Binary files /dev/null and b/figure2.pdf differ diff --git a/figure2.py b/figure2.py index fcff7fe..cf7efd5 100644 --- a/figure2.py +++ b/figure2.py @@ -3,9 +3,23 @@ from target_dists_stimper import CircularGaussianMixture from sklearn.metrics import silhouette_score from sklearn.cluster import KMeans +from matplotlib import rc +import matplotlib as mpl + +# figure formatting +mpl.rcParams['axes.prop_cycle'] = mpl.cycler('color', + ['ff7f00', '984ea3', '999999', '377eb8', + '4daf4a','f781bf', 'a65628', 'e41a1c', 'dede00']) +mpl.rcParams['text.usetex'] = True +#mpl.rcParams['text.latex.preamble'] = [ +# r'\usepackage{amsmath}', +# r'\usepackage{amssymb}'] +rc('font', family='serif') +rc('font', serif='cm') +rc('savefig', pad_inches=0.05) # generate example samples -nsample = 10000 +nsample = 1000 cgm = CircularGaussianMixture() s = cgm.sample(nsample).numpy() @@ -24,7 +38,16 @@ print(cluster_number) # plot a few examples of the clustering and the corresponding score. -fig, axes = plt.subplots(2, 3, figsize=(6.3, 4)) +fig = plt.figure(constrained_layout=True, figsize=(6.3, 4)) +gs = fig.add_gridspec(2, 5, width_ratios=[1, 1, 1, 0.8, 0.8]) + +axes = [] +for i in range(2): + ax = [] + for j in range(3): + ax.append(fig.add_subplot(gs[i, j])) + axes.append(ax) +axes = np.array(axes) cmap = 'inferno' @@ -36,18 +59,24 @@ axes[1, 2].scatter(s[:, 0], s[:, 1], c=labels[10], s=1, cmap=cmap) -axes[0, 0].set_title(r'$k=2, s=$' + '{:.3f}'.format(-losses[0])) -axes[0, 1].set_title(r'$k=4, s=$' + '{:.3f}'.format(-losses[2])) -axes[0, 2].set_title(r'$k=6, s=$' + '{:.3f}'.format(-losses[4])) -axes[1, 0].set_title(r'$k=8, s=$' + '{:.3f}'.format(-losses[6])) -axes[1, 1].set_title(r'$k=10, s=$' + '{:.3f}'.format(-losses[8])) -axes[1, 2].set_title(r'$k=12, s=$' + '{:.3f}'.format(-losses[10])) +axes[0, 0].set_title(r'$k=2$,' +'\n' + r'$s=$' + '{:.3f}'.format(-losses[0]), fontsize=10) +axes[0, 1].set_title(r'$k=4$,' +'\n' + r'$s=$' + '{:.3f}'.format(-losses[2]), fontsize=10) +axes[0, 2].set_title(r'$k=6$,' +'\n' + r'$s=$' + '{:.3f}'.format(-losses[4]), fontsize=10) +axes[1, 0].set_title(r'$k=8$,' +'\n' + r'$s=$' + '{:.3f}'.format(-losses[6]), fontsize=10) +axes[1, 1].set_title(r'$k=10$,' +'\n' + r'$s=$' + '{:.3f}'.format(-losses[8]), fontsize=10) +axes[1, 2].set_title(r'$k=12$,' +'\n' + r'$s=$' + '{:.3f}'.format(-losses[10]), fontsize=10) for i in range(2): for j in range(3): axes[i, j].set_xticks([]) axes[i, j].set_yticks([]) -plt.tight_layout() +ax = fig.add_subplot(gs[:, 3:]) +ax.plot(ks, -losses, 'o-', c='k') +ax.set_xlabel('Number of clusters') +ax.set_ylabel('Silhouette score') +ax.set_xticks(ks[::2]) + +#plt.tight_layout() plt.savefig('figure2.pdf', dpi=300) plt.show() \ No newline at end of file diff --git a/get_benchmark_epochs.py b/get_benchmark_epochs.py new file mode 100644 index 0000000..11adb8c --- /dev/null +++ b/get_benchmark_epochs.py @@ -0,0 +1,228 @@ +import numpy as np +import pickle +from margarine.maf import MAF +from margarine.clustered import clusterMAF +from target_dists_stimper import TwoMoons, CircularGaussianMixture, RingMixture +from sklearn.cluster import KMeans +import time + +nsample= 10000 +pEpochs = 10000 +epochs = 10000 + +cgm = CircularGaussianMixture() +tm = TwoMoons() +rm = RingMixture() + +base_dir = 'timing/' + +try: + cgm_maf_epochs = list(np.loadtxt(base_dir + 'cgm_maf_epochs.txt')) + cgm_clustermaf_epochs = list(np.loadtxt(base_dir + 'cgm_clustermaf_epochs.txt')) + tm_maf_epochs = list(np.loadtxt(base_dir + 'tm_maf_epochs.txt')) + tm_clustermaf_epochs = list(np.loadtxt(base_dir + 'tm_clustermaf_epochs.txt')) + rm_maf_epochs = list(np.loadtxt(base_dir + 'rm_maf_epochs.txt')) + rm_clustermaf_epochs = list(np.loadtxt(base_dir + 'rm_clustermaf_epochs.txt')) + + cgm_maf_time = list(np.loadtxt(base_dir + 'cgm_maf_time.txt')) + cgm_clustermaf_time = list(np.loadtxt(base_dir + 'cgm_clustermaf_time.txt')) + tm_maf_time = list(np.loadtxt(base_dir + 'tm_maf_time.txt')) + tm_clustermaf_time = list(np.loadtxt(base_dir + 'tm_clustermaf_time.txt')) + rm_maf_time = list(np.loadtxt(base_dir + 'rm_maf_time.txt')) + rm_clustermaf_time = list(np.loadtxt(base_dir + 'rm_clustermaf_time.txt')) + + cgm_maf_cost = list(np.loadtxt(base_dir + 'cgm_maf_cost.txt')) + cgm_clustermaf_cost = list(np.loadtxt(base_dir + 'cgm_clustermaf_cost.txt')) + tm_maf_cost = list(np.loadtxt(base_dir + 'tm_maf_cost.txt')) + tm_clustermaf_cost = list(np.loadtxt(base_dir + 'tm_clustermaf_cost.txt')) + rm_maf_cost = list(np.loadtxt(base_dir + 'rm_maf_cost.txt')) + rm_clustermaf_cost = list(np.loadtxt(base_dir + 'rm_clustermaf_cost.txt')) +except: + cgm_maf_epochs, tm_maf_epochs, rm_maf_epochs = [], [], [] + cgm_clustermaf_epochs, tm_clustermaf_epochs, rm_clustermaf_epochs = [], [], [] + cgm_maf_time, tm_maf_time, rm_maf_time = [], [], [] + cgm_clustermaf_time, tm_clustermaf_time, rm_clustermaf_time = [], [], [] + #cgm_cluster_number, tm_cluster_number, rm_cluster_number = [], [], [] + cgm_maf_cost, tm_maf_cost, rm_maf_cost = [], [], [] + cgm_clustermaf_cost, tm_clustermaf_cost, rm_clustermaf_cost = [], [], [] + +for d in range(len(cgm_maf_cost), 3): + + s = cgm.sample(nsample).numpy() + + # noraml maf for circle of gaussian + sAFlow = MAF(s) + start = time.time() + sAFlow.train(epochs, early_stop=True) + e = time.time() + cgm_maf_time.append(e-start) + cgm_maf_epochs.append(len(sAFlow.loss_history)) + hyps = 0 + for j, made in enumerate(sAFlow.mades): + hyps += made.count_params() + cgm_maf_cost.append(len(sAFlow.loss_history)*len(sAFlow.theta)*hyps) + + _ = clusterMAF(s) + #cgm_cluster_number.append(_.cluster_number) + nn = int((17424/_.cluster_number/2904)//1 + 1) + print('number clusters: ', _.cluster_number, ' number_networks: ', nn) + + kmeans = KMeans(_.cluster_number, random_state=0) + labels = kmeans.fit(s).predict(s) + sAFlow = clusterMAF(s, cluster_labels=labels, + cluster_number=_.cluster_number, number_networks=nn) + start = time.time() + sAFlow.train(pEpochs, early_stop=True) + e = time.time() + cgm_clustermaf_time.append(e-start) + cgm_clustermaf_epochs.append(np.sum([len(sAFlow.flow[i].loss_history) for i in range(len(sAFlow.flow))])) + cost = [] + for i in range(len(sAFlow.flow)): + hyps = 0 + for j, made in enumerate(sAFlow.flow[i].mades): + hyps += made.count_params() + cost.append(len(sAFlow.flow[i].loss_history)*len(sAFlow.flow[i].theta)*hyps) + cgm_clustermaf_cost.append(np.sum(cost)) + + s = tm.sample(nsample).numpy() + + # noraml maf for circle of gaussian + sAFlow = MAF(s) + start = time.time() + sAFlow.train(epochs, early_stop=True) + e = time.time() + tm_maf_time.append(e-start) + tm_maf_epochs.append(len(sAFlow.loss_history)) + hyps = 0 + for j, made in enumerate(sAFlow.mades): + hyps += made.count_params() + tm_maf_cost.append(len(sAFlow.loss_history)*len(sAFlow.theta)*hyps) + + _ = clusterMAF(s) + #tm_cluster_number.append(_.cluster_number) + nn = int((17424/_.cluster_number/2904)//1 + 1) + print('number clusters: ', _.cluster_number, ' number_networks: ', nn) + + kmeans = KMeans(_.cluster_number, random_state=0) + labels = kmeans.fit(s).predict(s) + sAFlow = clusterMAF(s, cluster_labels=labels, + cluster_number=_.cluster_number, number_networks=nn) + start = time.time() + sAFlow.train(pEpochs, early_stop=True) + e = time.time() + tm_clustermaf_time.append(e-start) + tm_clustermaf_epochs.append(np.sum([len(sAFlow.flow[i].loss_history) for i in range(len(sAFlow.flow))])) + cost = [] + for i in range(len(sAFlow.flow)): + hyps = 0 + for j, made in enumerate(sAFlow.flow[i].mades): + hyps += made.count_params() + cost.append(len(sAFlow.flow[i].loss_history)*len(sAFlow.flow[i].theta)*hyps) + tm_clustermaf_cost.append(np.sum(cost)) + + s = rm.sample(nsample).numpy() + + # noraml maf for circle of gaussian + sAFlow = MAF(s) + start = time.time() + sAFlow.train(epochs, early_stop=True) + e = time.time() + rm_maf_time.append(e-start) + rm_maf_epochs.append(len(sAFlow.loss_history)) + hyps = 0 + for j, made in enumerate(sAFlow.mades): + hyps += made.count_params() + rm_maf_cost.append(len(sAFlow.loss_history)*len(sAFlow.theta)*hyps) + + _ = clusterMAF(s) + #rm_cluster_number.append(_.cluster_number) + nn = int((17424/_.cluster_number/2904)//1 + 1) + print('number clusters: ', _.cluster_number, ' number_networks: ', nn) + + kmeans = KMeans(_.cluster_number, random_state=0) + labels = kmeans.fit(s).predict(s) + sAFlow = clusterMAF(s, cluster_labels=labels, + cluster_number=_.cluster_number, number_networks=nn) + start = time.time() + sAFlow.train(pEpochs, early_stop=True) + e = time.time() + rm_clustermaf_time.append(e-start) + rm_clustermaf_epochs.append(np.sum([len(sAFlow.flow[i].loss_history) for i in range(len(sAFlow.flow))])) + cost = [] + for i in range(len(sAFlow.flow)): + hyps = 0 + for j, made in enumerate(sAFlow.flow[i].mades): + hyps += made.count_params() + cost.append(len(sAFlow.flow[i].loss_history)*len(sAFlow.flow[i].theta)*hyps) + rm_clustermaf_cost.append(np.sum(cost)) + + np.savetxt(base_dir + 'cgm_maf_epochs.txt', cgm_maf_epochs) + np.savetxt(base_dir + 'cgm_clustermaf_epochs.txt', cgm_clustermaf_epochs) + np.savetxt(base_dir + 'tm_maf_epochs.txt', tm_maf_epochs) + np.savetxt(base_dir + 'tm_clustermaf_epochs.txt', tm_clustermaf_epochs) + np.savetxt(base_dir + 'rm_maf_epochs.txt', rm_maf_epochs) + np.savetxt(base_dir + 'rm_clustermaf_epochs.txt', rm_clustermaf_epochs) + + np.savetxt(base_dir + 'cgm_maf_time.txt', cgm_maf_time) + np.savetxt(base_dir + 'cgm_clustermaf_time.txt', cgm_clustermaf_time) + np.savetxt(base_dir + 'tm_maf_time.txt', tm_maf_time) + np.savetxt(base_dir + 'tm_clustermaf_time.txt', tm_clustermaf_time) + np.savetxt(base_dir + 'rm_maf_time.txt', rm_maf_time) + np.savetxt(base_dir + 'rm_clustermaf_time.txt', rm_clustermaf_time) + + np.savetxt(base_dir + 'cgm_maf_cost.txt', cgm_maf_cost) + np.savetxt(base_dir + 'cgm_clustermaf_cost.txt', cgm_clustermaf_cost) + np.savetxt(base_dir + 'tm_maf_cost.txt', tm_maf_cost) + np.savetxt(base_dir + 'tm_clustermaf_cost.txt', tm_clustermaf_cost) + np.savetxt(base_dir + 'rm_maf_cost.txt', rm_maf_cost) + np.savetxt(base_dir + 'rm_clustermaf_cost.txt', rm_clustermaf_cost) + + + + +print('cgm maf epochs: ', np.mean(cgm_maf_epochs), ' +/- ', np.std(cgm_maf_epochs)/np.sqrt(5)) +print('cgm clustermaf epochs: ', np.mean(cgm_clustermaf_epochs), ' +/- ', np.std(cgm_clustermaf_epochs)/np.sqrt(5)) + +print('tm maf epochs: ', np.mean(tm_maf_epochs), ' +/- ', np.std(tm_maf_epochs)/np.sqrt(5)) +print('tm clustermaf epochs: ', np.mean(tm_clustermaf_epochs), ' +/- ', np.std(tm_clustermaf_epochs)/np.sqrt(5)) + +print('rm maf epochs: ', np.mean(rm_maf_epochs), ' +/- ', np.std(rm_maf_epochs)/np.sqrt(5)) +print('rm clustermaf epochs: ', np.mean(rm_clustermaf_epochs), ' +/- ', np.std(rm_clustermaf_epochs)/np.sqrt(5)) + +print('cgm maf time: ', np.mean(cgm_maf_time), ' +/- ', np.std(cgm_maf_time)/np.sqrt(5)) +print('cgm clustermaf time: ', np.mean(cgm_clustermaf_time), ' +/- ', np.std(cgm_clustermaf_time)/np.sqrt(5)) + +print('tm maf time: ', np.mean(tm_maf_time), ' +/- ', np.std(tm_maf_time)/np.sqrt(5)) +print('tm clustermaf time: ', np.mean(tm_clustermaf_time), ' +/- ', np.std(tm_clustermaf_time)/np.sqrt(5)) + +print('rm maf time: ', np.mean(rm_maf_time), ' +/- ', np.std(rm_maf_time)/np.sqrt(5)) +print('rm clustermaf time: ', np.mean(rm_clustermaf_time), ' +/- ', np.std(rm_clustermaf_time)/np.sqrt(5)) + +print('cgm maf cost: ', np.mean(cgm_maf_cost), ' +/- ', np.std(cgm_maf_cost)/np.sqrt(5)) +print('cgm clustermaf cost: ', np.mean(cgm_clustermaf_cost), ' +/- ', np.std(cgm_clustermaf_cost)/np.sqrt(5)) + +print('tm maf cost: ', np.mean(tm_maf_cost), ' +/- ', np.std(tm_maf_cost)/np.sqrt(5)) +print('tm clustermaf cost: ', np.mean(tm_clustermaf_cost), ' +/- ', np.std(tm_clustermaf_cost)/np.sqrt(5)) + +print('rm maf cost: ', np.mean(rm_maf_cost), ' +/- ', np.std(rm_maf_cost)/np.sqrt(5)) +print('rm clustermaf cost: ', np.mean(rm_clustermaf_cost), ' +/- ', np.std(rm_clustermaf_cost)/np.sqrt(5)) + +print('cgm clustermaf cost: ', np.mean(cgm_clustermaf_cost/cgm_maf_cost), ' +/- ', + np.std(cgm_clustermaf_cost/cgm_maf_cost)/np.sqrt(5)) +print('tm clustermaf cost: ', np.mean(tm_clustermaf_cost/tm_maf_cost), ' +/- ', + np.std(tm_clustermaf_cost/tm_maf_cost)/np.sqrt(5)) +print('rm clustermaf cost: ', np.mean(rm_clustermaf_cost/rm_maf_cost), ' +/- ', + np.std(rm_clustermaf_cost/rm_maf_cost)/np.sqrt(5)) + +"""cgm_cluster_number = np.array(cgm_cluster_number) +tm_cluster_number = np.array(tm_cluster_number) +rm_cluster_number = np.array(rm_cluster_number) + +cgm_clsutermaf_time = np.array(cgm_clustermaf_time) +tm_clustermaf_time = np.array(tm_clustermaf_time) +rm_clustermaf_time = np.array(rm_clustermaf_time) + +print('cgm time per cluster: ', np.mean(cgm_clustermaf_time/cgm_cluster_number), ' +/- ', np.std(cgm_clustermaf_time/cgm_cluster_number)/np.sqrt(5)) +print('tm time per cluster: ', np.mean(tm_clustermaf_time/tm_cluster_number), ' +/- ', np.std(tm_clustermaf_time/tm_cluster_number)/np.sqrt(5)) +print('rm time per cluster: ', np.mean(rm_clustermaf_time/rm_cluster_number), ' +/- ', np.std(rm_clustermaf_time/rm_cluster_number)/np.sqrt(5)) +""" \ No newline at end of file diff --git a/physical_benchmarks/gas_maf.pkl b/physical_benchmarks/gas_maf.pkl new file mode 100644 index 0000000..913ae33 Binary files /dev/null and b/physical_benchmarks/gas_maf.pkl differ diff --git a/physical_benchmarks/hep__maf.pkl b/physical_benchmarks/hep__maf.pkl new file mode 100644 index 0000000..ea29359 Binary files /dev/null and b/physical_benchmarks/hep__maf.pkl differ diff --git a/physical_benchmarks/hep_clustermaf.pkl b/physical_benchmarks/hep_clustermaf.pkl new file mode 100644 index 0000000..74aa5eb Binary files /dev/null and b/physical_benchmarks/hep_clustermaf.pkl differ diff --git a/physical_benchmarks/miniboone_clustermaf.pkl b/physical_benchmarks/miniboone_clustermaf.pkl new file mode 100644 index 0000000..0a6db66 Binary files /dev/null and b/physical_benchmarks/miniboone_clustermaf.pkl differ diff --git a/physical_benchmarks/miniboone_maf.pkl b/physical_benchmarks/miniboone_maf.pkl new file mode 100644 index 0000000..6ec929d Binary files /dev/null and b/physical_benchmarks/miniboone_maf.pkl differ diff --git a/physical_benchmarks/slurm-hep.out b/physical_benchmarks/slurm-hep.out new file mode 100644 index 0000000..53e64a9 --- /dev/null +++ b/physical_benchmarks/slurm-hep.out @@ -0,0 +1,44 @@ +Loading rhel8/default-icl + Loading requirement: dot rhel8/slurm singularity/current rhel8/global + cuda/11.4 vgl/3.1/64 intel-oneapi-compilers/2022.1.0/gcc/b6zld2mz + intel-oneapi-mpi/2021.6.0/intel/guxuvcpm +Changed directory to /rds/user/htjb2/hpc-work/piecewise-normalizing-flows-reviews. + +JobID: 29794988 +====== +Time: Mon 16 Oct 12:58:53 BST 2023 +Running on master node: cpu-q-512 +Current directory: /rds/user/htjb2/hpc-work/piecewise-normalizing-flows-reviews + +Nodes allocated: +================ +cpu-q-512 + +numtasks=76, numnodes=1, mpi_tasks_per_node=76 (OMP_NUM_THREADS=1) + +Executing command: +================== +python read_hep.py + +2023-10-16 12:59:32.480533: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2023-10-16 12:59:34.779507: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +2023-10-16 12:59:47.341719: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +2023-10-16 13:00:08.109875: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected +2023-10-16 13:00:09.246384: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x2df2af0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2023-10-16 13:00:09.247080: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2023-10-16 13:00:10.525375: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process. +read_hep.py:10: ParserWarning: Length of header or names does not match length of data. This leads to a loss of data with index_col=False. + data_train = pd.read_csv(filepath_or_buffer=join(path, "1000_train.csv"), index_col=False) + 0%| | 0/10000 [00:00 5: + features_to_remove.append(i) + i += 1 + data_train = data_train[:, np.array([i for i in range(data_train.shape[1]) if i not in features_to_remove])] + data_test = data_test[:, np.array([i for i in range(data_test.shape[1]) if i not in features_to_remove])] + + N = data_train.shape[0] + N_validate = int(N*0.1) + data_validate = data_train[-N_validate:] + data_train = data_train[0:-N_validate] + + return data_train, data_validate, data_test + +data_train, data_validate, data_test = load_data_no_discrete_normalised_as_array("raw_physical_data/hepmass/") +print('data shape: ', data_train.shape) + +from margarine.clustered import clusterMAF +from margarine.maf import MAF +import math +from sklearn.cluster import KMeans + +try: + flow = MAF.load('physical_benchmarks/hep_maf.pkl') +except FileNotFoundError: + flow = MAF(data_train) + flow.train(10000, early_stop=True) + flow.save('physical_benchmarks/hep__maf.pkl') + +lps = flow.log_prob(data_test.astype(np.float32)) +mask = np.isfinite(lps) +print(np.mean(lps[mask])) +print(np.std(lps[mask]) / np.sqrt(len(lps[mask]))*2) + +try: + flow = clusterMAF.load('physical_benchmarks/hep_clustermaf.pkl') +except: + _ = clusterMAF(data_train) + nn = math.ceil(17424/_.cluster_number/2904) + print('number clusters: ', _.cluster_number, ' number_networks: ', nn) + + kmeans = KMeans(_.cluster_number, random_state=0) + labels = kmeans.fit(data_train).predict(data_train) + + flow = clusterMAF(data_train, cluster_number=_.cluster_number, cluster_labels=labels, number_networks=nn) + flow.train(10000, early_stop=True) + flow.save('physical_benchmarks/hep_clustermaf.pkl') + +print('number clusters: ', flow.cluster_number) +lps = flow.log_prob(data_test.astype(np.float32)) +mask = np.isfinite(lps) +print(np.mean(lps[mask])) +print(np.std(lps[mask]) / np.sqrt(len(lps[mask]))*2) diff --git a/read_miniboone.py b/read_miniboone.py new file mode 100644 index 0000000..38855e9 --- /dev/null +++ b/read_miniboone.py @@ -0,0 +1,86 @@ +import numpy as np + + +def load_data(root_path): + # NOTE: To remember how the pre-processing was done. + # data = pd.read_csv(root_path, names=[str(x) for x in range(50)], delim_whitespace=True) + # print data.head() + # data = data.as_matrix() + # # Remove some random outliers + # indices = (data[:, 0] < -100) + # data = data[~indices] + # + # i = 0 + # # Remove any features that have too many re-occuring real values. + # features_to_remove = [] + # for feature in data.T: + # c = Counter(feature) + # max_count = np.array([v for k, v in sorted(c.iteritems())])[0] + # if max_count > 5: + # features_to_remove.append(i) + # i += 1 + # data = data[:, np.array([i for i in range(data.shape[1]) if i not in features_to_remove])] + # np.save("~/data/miniboone/data.npy", data) + + data = np.load(root_path) + N_test = int(0.1*data.shape[0]) + data_test = data[-N_test:] + data = data[0:-N_test] + N_validate = int(0.1*data.shape[0]) + data_validate = data[-N_validate:] + data_train = data[0:-N_validate] + + return data_train, data_validate, data_test + + +def load_data_normalised(root_path): + + data_train, data_validate, data_test = load_data(root_path) + data = np.vstack((data_train, data_validate)) + mu = data.mean(axis=0) + s = data.std(axis=0) + data_train = (data_train - mu)/s + data_validate = (data_validate - mu)/s + data_test = (data_test - mu)/s + + return data_train, data_validate, data_test + +data_train, dv, data_test = load_data_normalised('raw_physical_data/miniboone/data.npy') +print('data shape: ', data_train.shape) + +from margarine.clustered import clusterMAF +from margarine.maf import MAF +import math +from sklearn.cluster import KMeans + +try: + flow = MAF.load('physical_benchmarks/miniboone_maf.pkl') +except FileNotFoundError: + flow = MAF(data_train) + flow.train(10000, early_stop=True) + flow.save('physical_benchmarks/miniboone_maf.pkl') + +lps = flow.log_prob(data_test.astype(np.float32)) +mask = np.isfinite(lps) +print(np.mean(lps[mask])) +print(np.std(lps[mask]) / np.sqrt(len(lps[mask]))*2) + +try: + flow = clusterMAF.load('physical_benchmarks/miniboone_clustermaf.pkl') +except: + _ = clusterMAF(data_train) + nn = math.ceil(17424/_.cluster_number/2904) + print('number clusters: ', _.cluster_number, ' number_networks: ', nn) + + kmeans = KMeans(_.cluster_number, random_state=0) + labels = kmeans.fit(data_train).predict(data_train) + + flow = clusterMAF(data_train, cluster_number=_.cluster_number, cluster_labels=labels, number_networks=nn) + flow.train(10000, early_stop=True) + flow.save('physical_benchmarks/miniboone_clustermaf.pkl') + +print('cluster number: ', flow.cluster_number) +lps = flow.log_prob(data_test.astype(np.float32)) +mask = np.isfinite(lps) +print(np.mean(lps[mask])) +print(np.std(lps[mask]) / np.sqrt(len(lps[mask]))*2) diff --git a/read_power.py b/read_power.py new file mode 100644 index 0000000..6439ed9 --- /dev/null +++ b/read_power.py @@ -0,0 +1,97 @@ +import numpy as np + + +def load_data(): + return np.load('raw_physical_data/power/data.npy') + + +def load_data_split_with_noise(): + + rng = np.random.RandomState(42) + + data = load_data() + rng.shuffle(data) + N = data.shape[0] + + data = np.delete(data, 3, axis=1) + data = np.delete(data, 1, axis=1) + ############################ + # Add noise + ############################ + # global_intensity_noise = 0.1*rng.rand(N, 1) + voltage_noise = 0.01*rng.rand(N, 1) + # grp_noise = 0.001*rng.rand(N, 1) + gap_noise = 0.001*rng.rand(N, 1) + sm_noise = rng.rand(N, 3) + time_noise = np.zeros((N, 1)) + # noise = np.hstack((gap_noise, grp_noise, voltage_noise, global_intensity_noise, sm_noise, time_noise)) + # noise = np.hstack((gap_noise, grp_noise, voltage_noise, sm_noise, time_noise)) + noise = np.hstack((gap_noise, voltage_noise, sm_noise, time_noise)) + data = data + noise + + N_test = int(0.1*data.shape[0]) + data_test = data[-N_test:] + data = data[0:-N_test] + N_validate = int(0.1*data.shape[0]) + data_validate = data[-N_validate:] + data_train = data[0:-N_validate] + + return data_train, data_validate, data_test + + +def load_data_normalised(): + + data_train, data_validate, data_test = load_data_split_with_noise() + data = np.vstack((data_train, data_validate)) + mu = data.mean(axis=0) + s = data.std(axis=0) + data_train = (data_train - mu)/s + data_validate = (data_validate - mu)/s + data_test = (data_test - mu)/s + + return data_train, data_validate, data_test + +np.random.seed(42) + +data_train, dv, data_test = load_data_normalised() +data_test = data_test[np.random.choice(len(data_test), 10000)] +data_train = data_train[np.random.choice(len(data_train), 25000)] +print('data shape: ', data_train.shape) + + +from margarine.clustered import clusterMAF +from margarine.maf import MAF +import math +from sklearn.cluster import KMeans + +try: + flow = MAF.load('physical_benchmarks/power_maf.pkl') +except FileNotFoundError: + flow = MAF(data_train) + flow.train(10000, early_stop=True) + flow.save('physical_benchmarks/power_maf.pkl') + +lps = flow.log_prob(data_test.astype(np.float32)) +mask = np.isfinite(lps) +print(np.mean(lps[mask])) +print(np.std(lps[mask]) / np.sqrt(len(lps[mask]))*2) + +try: + flow = clusterMAF.load('physical_benchmarks/power_clustermaf.pkl') +except: + _ = clusterMAF(data_train) + nn = math.ceil(17424/_.cluster_number/2904) + print('number clusters: ', _.cluster_number, ' number_networks: ', nn) + + kmeans = KMeans(_.cluster_number, random_state=0) + labels = kmeans.fit(data_train).predict(data_train) + + flow = clusterMAF(data_train, cluster_number=_.cluster_number, cluster_labels=labels, number_networks=nn) + flow.train(10000, early_stop=True) + flow.save('physical_benchmarks/power_clustermaf.pkl') + +print('cluster number: ', flow.cluster_number) +lps = flow.log_prob(data_test.astype(np.float32)) +mask = np.isfinite(lps) +print(np.mean(lps[mask])) +print(np.std(lps[mask]) / np.sqrt(len(lps[mask]))*2) \ No newline at end of file diff --git a/real_nvp_timing.py b/real_nvp_timing.py new file mode 100644 index 0000000..0f28eeb --- /dev/null +++ b/real_nvp_timing.py @@ -0,0 +1,217 @@ +import numpy as np +import matplotlib.pyplot as plt +from target_dists_stimper import TwoMoons, CircularGaussianMixture, RingMixture +import torch +from scipy.special import logsumexp +from tensorflow import keras +import normflows as nf +import larsflow as lf +from tqdm import tqdm +from matplotlib import rc +import matplotlib as mpl +from sklearn.model_selection import train_test_split +import os +import time + + +# figure formatting +mpl.rcParams['axes.prop_cycle'] = mpl.cycler('color', + ['ff7f00', '984ea3', '999999', '377eb8', + '4daf4a','f781bf', 'a65628', 'e41a1c', 'dede00']) +mpl.rcParams['text.usetex'] = True +#mpl.rcParams['text.latex.preamble'] = [ +# r'\usepackage{amsmath}', +# r'\usepackage{amssymb}'] +rc('font', family='serif') +rc('font', serif='cm') +rc('savefig', pad_inches=0.05) + + +def create_model(p, base='gauss'): + + """this function and the next are taken from Vincent Stimpers work + on realNVPs with resampled bases + https://github.com/VincentStimper/resampled-base-flows""" + + # Set up model + + # Define flows + K = 8 + torch.manual_seed(10) + + latent_size = 2 + b = torch.Tensor([1 if i % 2 == 0 else 0 for i in range(latent_size)]) + flows = [] + for i in range(K): + param_map = nf.nets.MLP([latent_size // 2, 17, 17, latent_size], init_zeros=True) + flows += [nf.flows.AffineCouplingBlock(param_map)] + flows += [nf.flows.Permute(latent_size, mode='swap')] + flows += [nf.flows.ActNorm(latent_size)] + + # Set prior and q0 + if base == 'resampled': + a = nf.nets.MLP([latent_size, 128, 128, 1], output_fn="sigmoid") + q0 = lf.distributions.ResampledGaussian(latent_size, a, 100, 0.1, trainable=False) + elif base == 'gaussian_mixture': + n_modes = 10 + q0 = nf.distributions.GaussianMixture(n_modes, latent_size, trainable=True, + loc=(np.random.rand(n_modes, latent_size) - 0.5) * 5, + scale=0.5 * np.ones((n_modes, latent_size))) + elif base == 'gauss': + q0 = nf.distributions.DiagGaussian(latent_size, trainable=False) + else: + raise NotImplementedError('This base distribution is not implemented.') + + # Construct flow model + model = lf.NormalizingFlow(q0=q0, flows=flows, p=p) + + # Move model on GPU if available + return model.to(device) + +def train(model, max_iter=20000, num_samples=2 ** 10, lr=1e-3, weight_decay=1e-3, + q0_weight_decay=1e-4): + """ + train() has been modified to include early stopping + This is the train for the realNVP from stimper et al. + """ + # Do mixed precision training + optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) + model.train() + + x = model.p.sample(num_samples) + w = np.ones(len(x)) + x_train, x_test, w_train, w_test = train_test_split(x, w, test_size=0.2) + + train_loss = [] + test_loss = [] + c = 0 + for it in tqdm(range(max_iter)): + + loss = model.forward_kld(x_train) + train_loss.append(loss) + test_loss.append(model.forward_kld(x_test).detach()) + + loss.backward() + optimizer.step() + + # Clear gradients + nf.utils.clear_grad(model) + + c += 1 + if it == 0: + minimum_loss = test_loss[-1] + minimum_epoch = it + minimum_model = None + else: + if test_loss[-1] < minimum_loss: + minimum_loss = test_loss[-1] + minimum_epoch = it + minimum_model = model + c = 0 + #print(i, minimum_epoch, minimum_loss.numpy(), test_loss[-1].numpy()) + if minimum_model: + if c == round((max_iter/100)*2): + print('Early stopped. Epochs used = ' + str(it)) + return minimum_model, it + +def mask_arr(arr): + return arr[np.isfinite(arr)], np.isfinite(arr) + +def calc_kl(samples, Flow, base): + """Calculate KL divergences for the MAFs""" + + target_logprob = base.log_prob(torch.from_numpy(samples)).numpy() + logprob = Flow.log_prob(samples) + logprob, mask = mask_arr(logprob) + target_logprob = target_logprob[mask] + logprob -= logsumexp(logprob) + target_logprob -= logsumexp(target_logprob) + delta_logprob = target_logprob - logprob + kldiv = np.mean(delta_logprob) + + kl_error = np.std(delta_logprob)/np.sqrt(len(delta_logprob)) + return kldiv, kl_error + +device = torch.device('cpu') + +# for the MAFs and piecewise MAFs +lr_schedule = keras.optimizers.schedules.ExponentialDecay( + initial_learning_rate=1e-3, + decay_steps=25, + decay_rate=0.9) + +nsample= 10000 +kl_nsample= 10000 +pEpochs = 20000 +epochs = 20000 + +base = 'benchmark_duplicates/' +if not os.path.exists(base): + os.mkdir(base) + +tm = TwoMoons() +times, epochs, cost = [], [], [] +for d in range(5): + s = time.time() + model = create_model(tm, 'resampled') + model, it = train(model) + e = time.time() + times.append(e-s) + epochs.append(it) + c = 0 + for parameter in model.parameters(): + c += len(parameter.flatten()) + cost.append(it*nsample*c) + +print('Two Moons') +print('Mean time: ', np.mean(times)) +print('Std time: ', np.std(times)/np.sqrt(len(times))) +print('Mean epochs: ', np.mean(epochs)) +print('Std epochs: ', np.std(epochs)/np.sqrt(len(epochs))) +print('Mean cost: ', np.mean(cost)) +print('Std cost: ', np.std(cost)/np.sqrt(len(cost))) + +cgm = CircularGaussianMixture() +times, epochs, cost = [], [], [] +for d in range(5): + s = time.time() + model = create_model(cgm, 'resampled') + model, it = train(model) + e = time.time() + times.append(e-s) + epochs.append(it) + c = 0 + for parameter in model.parameters(): + c += len(parameter.flatten()) + cost.append(it*nsample*c) + +print('Circle Gaussian Mixture') +print('Mean time: ', np.mean(times)) +print('Std time: ', np.std(times)/np.sqrt(len(times))) +print('Mean epochs: ', np.mean(epochs)) +print('Std epochs: ', np.std(epochs)/np.sqrt(len(epochs))) +print('Mean cost: ', np.mean(cost)) +print('Std cost: ', np.std(cost)/np.sqrt(len(cost))) + +rm = RingMixture() +times, epochs, cost = [], [], [] +for d in range(5): + s = time.time() + model = create_model(rm, 'resampled') + model, it = train(model) + e = time.time() + times.append(e-s) + epochs.append(it) + c = 0 + for parameter in model.parameters(): + c += len(parameter.flatten()) + cost.append(it*nsample*c) + +print('Ring Mixture') +print('Mean time: ', np.mean(times)) +print('Std time: ', np.std(times)/np.sqrt(len(times))) +print('Mean epochs: ', np.mean(epochs)) +print('Std epochs: ', np.std(epochs)/np.sqrt(len(epochs))) +print('Mean cost: ', np.mean(cost)) +print('Std cost: ', np.std(cost)/np.sqrt(len(cost))) + \ No newline at end of file diff --git a/timing/cgm_clustermaf_cost.txt b/timing/cgm_clustermaf_cost.txt new file mode 100644 index 0000000..f82737e --- /dev/null +++ b/timing/cgm_clustermaf_cost.txt @@ -0,0 +1,2 @@ +7.328381359200000000e+10 +6.762073771200000000e+10 diff --git a/timing/cgm_clustermaf_epochs.txt b/timing/cgm_clustermaf_epochs.txt new file mode 100644 index 0000000..0b2ac15 --- /dev/null +++ b/timing/cgm_clustermaf_epochs.txt @@ -0,0 +1,2 @@ +2.013600000000000000e+04 +1.872100000000000000e+04 diff --git a/timing/cgm_clustermaf_time.txt b/timing/cgm_clustermaf_time.txt new file mode 100644 index 0000000..bcc7e5d --- /dev/null +++ b/timing/cgm_clustermaf_time.txt @@ -0,0 +1,2 @@ +3.078082609176635742e+01 +2.756983566284179688e+01 diff --git a/timing/cgm_maf_cost.txt b/timing/cgm_maf_cost.txt new file mode 100644 index 0000000..8fffe99 --- /dev/null +++ b/timing/cgm_maf_cost.txt @@ -0,0 +1,2 @@ +2.435875200000000000e+11 +3.857673600000000000e+11 diff --git a/timing/cgm_maf_epochs.txt b/timing/cgm_maf_epochs.txt new file mode 100644 index 0000000..3f4c3d5 --- /dev/null +++ b/timing/cgm_maf_epochs.txt @@ -0,0 +1,2 @@ +1.398000000000000000e+03 +2.214000000000000000e+03 diff --git a/timing/cgm_maf_time.txt b/timing/cgm_maf_time.txt new file mode 100644 index 0000000..e35c410 --- /dev/null +++ b/timing/cgm_maf_time.txt @@ -0,0 +1,2 @@ +3.226764988899230957e+01 +4.878767609596252441e+01 diff --git a/timing/rm_clustermaf_cost.txt b/timing/rm_clustermaf_cost.txt new file mode 100644 index 0000000..4d12a66 --- /dev/null +++ b/timing/rm_clustermaf_cost.txt @@ -0,0 +1,2 @@ +7.168698820800000000e+10 +8.810138356800000000e+10 diff --git a/timing/rm_clustermaf_epochs.txt b/timing/rm_clustermaf_epochs.txt new file mode 100644 index 0000000..f1801a4 --- /dev/null +++ b/timing/rm_clustermaf_epochs.txt @@ -0,0 +1,2 @@ +3.894300000000000000e+04 +4.875500000000000000e+04 diff --git a/timing/rm_clustermaf_time.txt b/timing/rm_clustermaf_time.txt new file mode 100644 index 0000000..25cabce --- /dev/null +++ b/timing/rm_clustermaf_time.txt @@ -0,0 +1,2 @@ +4.430553531646728516e+01 +5.213389992713928223e+01 diff --git a/timing/rm_maf_cost.txt b/timing/rm_maf_cost.txt new file mode 100644 index 0000000..1a32596 --- /dev/null +++ b/timing/rm_maf_cost.txt @@ -0,0 +1,2 @@ +1.284148800000000000e+11 +1.782475200000000000e+11 diff --git a/timing/rm_maf_epochs.txt b/timing/rm_maf_epochs.txt new file mode 100644 index 0000000..40c30b2 --- /dev/null +++ b/timing/rm_maf_epochs.txt @@ -0,0 +1,2 @@ +7.370000000000000000e+02 +1.023000000000000000e+03 diff --git a/timing/rm_maf_time.txt b/timing/rm_maf_time.txt new file mode 100644 index 0000000..667bd9c --- /dev/null +++ b/timing/rm_maf_time.txt @@ -0,0 +1,2 @@ +1.807397699356079102e+01 +2.372441291809082031e+01 diff --git a/timing/tm_clustermaf_cost.txt b/timing/tm_clustermaf_cost.txt new file mode 100644 index 0000000..54c0d13 --- /dev/null +++ b/timing/tm_clustermaf_cost.txt @@ -0,0 +1,2 @@ +9.753297734400000000e+10 +1.020148715520000000e+11 diff --git a/timing/tm_clustermaf_epochs.txt b/timing/tm_clustermaf_epochs.txt new file mode 100644 index 0000000..12aebb5 --- /dev/null +++ b/timing/tm_clustermaf_epochs.txt @@ -0,0 +1,2 @@ +1.679000000000000000e+03 +1.756000000000000000e+03 diff --git a/timing/tm_clustermaf_time.txt b/timing/tm_clustermaf_time.txt new file mode 100644 index 0000000..aee645b --- /dev/null +++ b/timing/tm_clustermaf_time.txt @@ -0,0 +1,2 @@ +1.877429795265197754e+01 +1.905090618133544922e+01 diff --git a/timing/tm_maf_cost.txt b/timing/tm_maf_cost.txt new file mode 100644 index 0000000..6c0cc69 --- /dev/null +++ b/timing/tm_maf_cost.txt @@ -0,0 +1,2 @@ +7.513228800000000000e+11 +7.318080000000000000e+11 diff --git a/timing/tm_maf_epochs.txt b/timing/tm_maf_epochs.txt new file mode 100644 index 0000000..be2b434 --- /dev/null +++ b/timing/tm_maf_epochs.txt @@ -0,0 +1,2 @@ +4.312000000000000000e+03 +4.200000000000000000e+03 diff --git a/timing/tm_maf_time.txt b/timing/tm_maf_time.txt new file mode 100644 index 0000000..98fc7cc --- /dev/null +++ b/timing/tm_maf_time.txt @@ -0,0 +1,2 @@ +9.542246079444885254e+01 +9.178652691841125488e+01