From a3fa0c4c8a2fdb31a9278f9fcd86a019a9caef13 Mon Sep 17 00:00:00 2001 From: Jiarui Fang Date: Fri, 25 Oct 2024 10:32:33 +0800 Subject: [PATCH] upgrade diffusers to 0.31 (#316) --- benchmark/run.sh | 12 +++++++++--- examples/pixartalpha_example.py | 5 ++++- examples/run.sh | 9 +++++++-- examples/sd3_example.py | 3 ++- setup.py | 2 +- xfuser/__version__.py | 2 +- 6 files changed, 24 insertions(+), 9 deletions(-) diff --git a/benchmark/run.sh b/benchmark/run.sh index 4adc2ea5..8a4c8731 100644 --- a/benchmark/run.sh +++ b/benchmark/run.sh @@ -6,8 +6,11 @@ set -x # MODEL="/mnt/models/SD/stable-diffusion-3-medium-diffusers" # SCRIPT="./examples/sd3_example.py" -MODEL="/mnt/models/SD/HunyuanDiT-v1.2-Diffusers" -SCRIPT="./examples/hunyuandit_example.py" +# MODEL="/mnt/models/SD/HunyuanDiT-v1.2-Diffusers" +# SCRIPT="./examples/hunyuandit_example.py" + +MODEL="/cfs/dit/FLUX.1-dev/" +SCRIPT="./examples/flux_example.py" export PYTHONPATH=$PWD:$PYTHONPATH @@ -15,4 +18,7 @@ python benchmark/single_node_latency_test.py \ --model_id $MODEL \ --script $SCRIPT \ --sizes 1024 \ ---no_use_resolution_binning \ No newline at end of file +--no_use_resolution_binning \ +--num_inference_steps 28 \ +--no_use_cfg_parallel \ +--n_gpus 4 \ No newline at end of file diff --git a/examples/pixartalpha_example.py b/examples/pixartalpha_example.py index 4a333352..4d50467a 100644 --- a/examples/pixartalpha_example.py +++ b/examples/pixartalpha_example.py @@ -24,6 +24,7 @@ def main(): engine_config=engine_config, torch_dtype=torch.float16, ).to(f"cuda:{local_rank}") + model_memory = torch.cuda.max_memory_allocated(device=f"cuda:{local_rank}") pipe.prepare_run(input_config) torch.cuda.reset_peak_memory_stats() @@ -62,7 +63,9 @@ def main(): print(img_file) if get_world_group().rank == get_world_group().world_size - 1: - print(f"epoch time: {elapsed_time:.2f} sec, memory: {peak_memory/1e9} GB") + print( + f"epoch time: {elapsed_time:.2f} sec, model memory: {model_memory/1e9:.2f} GB, overall memory: {peak_memory/1e9:.2f} GB" + ) get_runtime_state().destory_distributed_env() diff --git a/examples/run.sh b/examples/run.sh index 6119ee95..c24f8c05 100644 --- a/examples/run.sh +++ b/examples/run.sh @@ -27,8 +27,10 @@ mkdir -p ./results # task args TASK_ARGS="--height 1024 --width 1024 --no_use_resolution_binning" + +# On 8 gpus, pp=2, ulysses=2, ring=1, cfg_parallel=2 (split batch) N_GPUS=8 -PARALLEL_ARGS="--ulysses_degree 1 --ring_degree 1 --pipefusion_parallel_degree 8" +PARALLEL_ARGS="--pipefusion_parallel_degree 2 --ulysses_degree 2 --ring_degree 2" # CFG_ARGS="--use_cfg_parallel" @@ -43,6 +45,9 @@ PARALLEL_ARGS="--ulysses_degree 1 --ring_degree 1 --pipefusion_parallel_degree 8 # Another compile option is `--use_onediff` which will use onediff's compiler. # COMPILE_FLAG="--use_torch_compile" + +# export CUDA_VISIBLE_DEVICES=4,5,6,7 + torchrun --nproc_per_node=$N_GPUS ./examples/$SCRIPT \ --model $MODEL_ID \ $PARALLEL_ARGS \ @@ -50,7 +55,7 @@ $TASK_ARGS \ $PIPEFUSION_ARGS \ $OUTPUT_ARGS \ --num_inference_steps $INFERENCE_STEP \ ---warmup_steps 0 \ +--warmup_steps 1 \ --prompt "brown dog laying on the ground with a metal bowl in front of him." \ $CFG_ARGS \ $PARALLLEL_VAE \ diff --git a/examples/sd3_example.py b/examples/sd3_example.py index 5ea5cc27..17de013a 100644 --- a/examples/sd3_example.py +++ b/examples/sd3_example.py @@ -66,8 +66,9 @@ def main(): if get_world_group().rank == get_world_group().world_size - 1: print( - f"{parallel_info} epoch time: {elapsed_time:.2f} sec, parameter memory: {parameter_peak_memory/1e9:.2f} GB, peak memory: {peak_memory/1e9:.2f} GB" + f"epoch time: {elapsed_time:.2f} sec, parameter memory: {parameter_peak_memory/1e9:.2f} GB, peak memory: {peak_memory/1e9:.2f} GB" ) + get_runtime_state().destory_distributed_env() diff --git a/setup.py b/setup.py index 0749593a..452ce123 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ def get_cuda_version(): install_requires=[ "torch>=2.1.0", "accelerate>=0.33.0", - "diffusers@git+https://github.com/huggingface/diffusers", # NOTE: diffusers>=0.31.0.dev is necessary for CogVideoX and Flux + "diffusers>=0.31", # NOTE: diffusers>=0.31.0 is necessary for CogVideoX and Flux "transformers>=4.39.1", "sentencepiece>=0.1.99", "beautifulsoup4>=4.12.3", diff --git a/xfuser/__version__.py b/xfuser/__version__.py index f9aa3e11..e19434e2 100644 --- a/xfuser/__version__.py +++ b/xfuser/__version__.py @@ -1 +1 @@ -__version__ = "0.3.2" +__version__ = "0.3.3"