projectdl/model/ddpm/config.py at main · UntitledNotebook/projectdl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# config.py
import time

# -- Data Configuration --
data_config = {
    "data_file_path": "/root/autodl-tmp/projectdl/data/block_ids_20000.npy",
    "bit_representation_length": 5,  # Channels for x_t, and also for x_self_cond if used
    "image_spatial_shape": (32, 32, 32),
    "batch_size": 48,
    "num_workers": 4,
    "shuffle_data": True,
}

# -- Diffusion Process Configuration --
diffusion_config = {
    "analog_bit_scale": 1.0,
    "self_condition_diffusion_process": True, # Master flag for enabling self-conditioning behavior in BitDiffusion
    "gamma_ns": 0.0002,
    "gamma_ds": 0.00025,
}

# -- Model (UNet3D) Configuration --
model_config = {
    "input_channels": data_config["bit_representation_length"] * 2 if diffusion_config["self_condition_diffusion_process"] else data_config["bit_representation_length"],
    "model_channels": 64,
    "output_channels": data_config["bit_representation_length"],
    "channel_mults": (1, 2, 4),
    "num_residual_blocks_per_stage": 2,
    "time_embedding_dim": 128,
    "time_mlp_hidden_dim": 512,
    "time_final_emb_dim": 512,
    "attention_resolutions_indices": (1, 2,),
    "attention_type": "linear",
    "attention_heads": 8,
    "dropout": 0.1,
    "groups": 8,
    "initial_conv_kernel_size": 3,
}


# -- Training Configuration --
train_config = {
    "num_train_epochs": 20,
    "learning_rate": 1e-4,
    "adam_beta1": 0.9,
    "adam_beta2": 0.999,
    "adam_weight_decay": 1e-6,
    "adam_epsilon": 1e-8,
    "lr_scheduler_type": "cosine",
    "lr_warmup_steps": 500,
    "gradient_accumulation_steps": 1,
    "mixed_precision": "no",
    "output_dir": f"outputs/{time.strftime('%Y%m%d-%H%M%S')}",
    "seed": 42,

    "log_with_wandb": True,
    "wandb_project_name": "projectdl",
    "wandb_entity_name": None,
    "wandb_group": "ddpm",

    "sampling_steps_train": 1000,
    "time_difference_td": 0.0,
    "num_samples_to_log": 4,
    "log_samples_every_n_steps": 5000,
    "ema_decay": 0.9999,
}