From 20c006c35475c564e5c0803f64da0e7a1bac8f84 Mon Sep 17 00:00:00 2001 From: westfish Date: Fri, 1 Nov 2024 04:16:55 +0000 Subject: [PATCH 01/11] update sd readme --- ppdiffusers/examples/stable_diffusion/README.md | 2 +- ppdiffusers/examples/stable_diffusion/requirements.txt | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/ppdiffusers/examples/stable_diffusion/README.md b/ppdiffusers/examples/stable_diffusion/README.md index 4bde9bd0b..f95b580ad 100644 --- a/ppdiffusers/examples/stable_diffusion/README.md +++ b/ppdiffusers/examples/stable_diffusion/README.md @@ -41,7 +41,7 @@ cd PaddleMIX/ppdiffusers/examples/stable_diffusion pip install -r requirements.txt ``` -> 注:本模型训练与推理需要依赖 CUDA 11.2 及以上版本,如果本地机器不符合要求,建议前往 [AI Studio](https://aistudio.baidu.com/index) 进行模型训练、推理任务。 +> 注:本模型训练与推理需要依赖 CUDA 11.2 及以上版本,如果本地机器不符合要求,建议前往 [AI Studio](https://aistudio.baidu.com/index) 进行模型训练、推理任务。推荐使用Linux系统,Windows系统未经过系统测试。 ## 3. 数据准备 diff --git a/ppdiffusers/examples/stable_diffusion/requirements.txt b/ppdiffusers/examples/stable_diffusion/requirements.txt index 06a6c755c..c3f48edc2 100644 --- a/ppdiffusers/examples/stable_diffusion/requirements.txt +++ b/ppdiffusers/examples/stable_diffusion/requirements.txt @@ -1,6 +1,3 @@ -paddlenlp>=2.6.1 -ppdiffusers>=0.19.3 fastcore visualdl -Pillow safetensors \ No newline at end of file From 4d50051b031674088838cc3130a943d5f98b4646 Mon Sep 17 00:00:00 2001 From: westfish Date: Fri, 15 Nov 2024 11:41:48 +0800 Subject: [PATCH 02/11] add test_tipc/dygraph/dp/stable_diffusion_3 --- ...e_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh | 27 +++ ...e_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh | 27 +++ ...e_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh | 27 +++ ...diffusion_3-dreambooth_lora_bs1_fp16_DP.sh | 27 +++ ...diffusion_3-dreambooth_lora_bs4_bf16_DP.sh | 27 +++ ...diffusion_3-dreambooth_lora_bs4_fp16_DP.sh | 27 +++ ...e_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh | 27 +++ ...e_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh | 27 +++ ...e_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh | 27 +++ ...diffusion_3-dreambooth_lora_bs1_fp16_DP.sh | 27 +++ ...diffusion_3-dreambooth_lora_bs4_bf16_DP.sh | 27 +++ ...diffusion_3-dreambooth_lora_bs4_fp16_DP.sh | 27 +++ .../benchmark_common/analysis_log.py | 155 ++++++++++++++ .../benchmark_common/prepare.sh | 66 ++++++ .../benchmark_common/run_benchmark.sh | 196 ++++++++++++++++++ 15 files changed, 741 insertions(+) create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh new file mode 100644 index 000000000..998f2edbc --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_ft +model=stable_diffusion_3 +bs_item=1 +fp_item=fp16 +run_mode=DP +device_num=N1C1 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh new file mode 100644 index 000000000..f2a24ab7e --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_ft +model=stable_diffusion_3 +bs_item=4 +fp_item=bf16 +run_mode=DP +device_num=N1C1 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh new file mode 100644 index 000000000..1692f8a5d --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_ft +model=stable_diffusion_3 +bs_item=4 +fp_item=fp16 +run_mode=DP +device_num=N1C1 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh new file mode 100644 index 000000000..e69f24dec --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_lora +model=stable_diffusion_3 +bs_item=1 +fp_item=fp16 +run_mode=DP +device_num=N1C1 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh new file mode 100644 index 000000000..4fb089122 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_lora +model=stable_diffusion_3 +bs_item=4 +fp_item=bf16 +run_mode=DP +device_num=N1C1 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh new file mode 100644 index 000000000..c54b80e7e --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_lora +model=stable_diffusion_3 +bs_item=4 +fp_item=fp16 +run_mode=DP +device_num=N1C1 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh new file mode 100644 index 000000000..1cfe3ce76 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_ft +model=stable_diffusion_3 +bs_item=1 +fp_item=fp16 +run_mode=DP +device_num=N1C8 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh new file mode 100644 index 000000000..3c68cfbbe --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_ft +model=stable_diffusion_3 +bs_item=4 +fp_item=bf16 +run_mode=DP +device_num=N1C8 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh new file mode 100644 index 000000000..94025ce08 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_ft +model=stable_diffusion_3 +bs_item=4 +fp_item=fp16 +run_mode=DP +device_num=N1C8 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh new file mode 100644 index 000000000..88eccc249 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_lora +model=stable_diffusion_3 +bs_item=1 +fp_item=fp16 +run_mode=DP +device_num=N1C8 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh new file mode 100644 index 000000000..a8a1b5ea4 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_lora +model=stable_diffusion_3 +bs_item=4 +fp_item=bf16 +run_mode=DP +device_num=N1C8 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh new file mode 100644 index 000000000..82576d762 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_lora +model=stable_diffusion_3 +bs_item=4 +fp_item=fp16 +run_mode=DP +device_num=N1C8 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py new file mode 100644 index 000000000..7606e8adc --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py @@ -0,0 +1,155 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +import json +import os +import re +import sys +from pdb import line_prefix + +import numpy as np +from numpy import mean, var + +class TimeAnalyzer(object): + def __init__(self, filename, keyword=None, loss_keyword=None): + if filename is None: + raise Exception("Please specify the filename!") + + if keyword is None: + raise Exception("Please specify the keyword!") + + self.filename = filename + self.keyword = keyword + self.loss_keyword = loss_keyword + + def get_ips(self): + ips_list = [] + loss_list = [] + loss_value = None + with open(self.filename, "r") as f_object: + lines = f_object.read().splitlines() + for line in lines: + if self.keyword not in line: + continue + try: + # result = None + + # # Distill the string from a line. + # line = line.strip() + # line_words = line.split() + # for i in range(len(line_words) - 1): + # if line_words[i] == self.keyword: + # result = float(line_words[i + 1].replace(',', '')) + # ips_list.append(result) + # if line_words[i] == self.loss_keyword: + # # 剔除掉该值后面的逗号并保留5位小数点 + # loss_value = line_words[i + 1].replace(',', '') + # # 保留5位小数 + # # loss_value = float("{:.5f}".format(float(loss_str_without_comma))) + + # # Distil the result from the picked string. + + # 提取 ips + ips_match = re.search(r'(\d+\.\d+)it/s', line) + if ips_match: + ips = float(ips_match.group(1)) + ips_list.append(ips) + + # 提取 loss + loss_match = re.search(r'loss=(\d+\.\d+)', line) + if loss_match: + loss = float(loss_match.group(1)) + loss_list.append(loss) + loss_value = loss + + except Exception as exc: + print("line is: {}; failed".format(line)) + print("Exception: {}".format(exc)) + if loss_value is None: + loss_value = -1 + def ewma(data, alpha): + smoothed_data = [] + for i, value in enumerate(data): + if i == 0: + smoothed_data.append(value) + else: + smoothed_value = alpha * value + (1 - alpha) * smoothed_data[-1] + smoothed_data.append(smoothed_value) + return smoothed_data + smoothed_loss = ewma(loss_list, 0.9)[-1] + return mean(ips_list[4:]), loss_value, smoothed_loss + + +def analyze(model_item, log_file, res_log_file, device_num, bs, fp_item): + + analyzer = TimeAnalyzer(log_file, 'Steps:', None) + ips, convergence_value, smoothed_value = analyzer.get_ips() + ips = round(ips, 3) + # with open(str(log_file), "r", encoding="utf8") as f: + # data = f.readlines() + # ips_lines = [] + # for eachline in data: + # if "train_samples_per_second:" in eachline: + # ips = float(eachline.split("train_samples_per_second: ")[1].split()[0].replace(',', '')) + # print("----ips: ", ips) + # ips_lines.append(ips) + # print("----ips_lines: ", ips_lines) + # ips = np.round(np.mean(ips_lines), 3) + ngpus = int(re.findall("\d+", device_num)[-1]) + batch_size = int(re.findall("\d+", str(bs))[-1]) + print("----ips: ", ips, "ngpus", ngpus, "batch_size", batch_size) + ips *= batch_size + ips *= ngpus + run_mode = "DP" + + model_name = model_item + "_" + "bs" + str(bs) + "_" + fp_item + "_" + run_mode + info = { + "model_branch": os.getenv("model_branch"), + "model_commit": os.getenv("model_commit"), + "model_name": model_name, + "batch_size": bs, + "fp_item": fp_item, + "run_mode": run_mode, + "convergence_value": convergence_value, + "smoothed_value": smoothed_value, + "convergence_key": "", + "ips": ips, + "speed_unit": "sample/sec", + "device_num": device_num, + "model_run_time": os.getenv("model_run_time"), + "frame_commit": "", + "frame_version": os.getenv("frame_version"), + } + json_info = json.dumps(info) + print(json_info) + with open(res_log_file, "w") as of: + of.write(json_info) + + +if __name__ == "__main__": + if len(sys.argv) != 7: + print("Usage:" + sys.argv[0] + " model_item path/to/log/file path/to/res/log/file") + sys.exit() + + + model_item = sys.argv[1] + log_file = sys.argv[2] + res_log_file = sys.argv[3] + device_num = sys.argv[4] + bs = int(sys.argv[5]) + fp_item = sys.argv[6] + + analyze(model_item, log_file, res_log_file, device_num, bs, fp_item) \ No newline at end of file diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh new file mode 100644 index 000000000..1df6ab524 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh @@ -0,0 +1,66 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +if [ ! -d "stable-diffusion-3-medium-diffusers-paddle-init" ]; then + echo "Downloading stable-diffusion-3-medium-diffusers-paddle-init.tar.gz..." + wget https://bj.bcebos.com/paddlenlp/models/community/westfish/sd3_benchmark/stable-diffusion-3-medium-diffusers-paddle-init.tar.gz + echo "Extracting stable-diffusion-3-medium-diffusers-paddle-init.tar.gz..." + tar -zxvf stable-diffusion-3-medium-diffusers-paddle-init.tar.gz +else + echo "Directory stable-diffusion-3-medium-diffusers-paddle-init already exists. Skipping download." +fi + +if [ ! -d "dog" ]; then + echo "Downloading dog.zip..." + wget https://paddlenlp.bj.bcebos.com/models/community/westfish/develop-sdxl/dog.zip + echo "Unzipping dog.zip..." + unzip dog.zip +else + echo "Directory dog already exists. Skipping download." +fi + +# rm -rf stable-diffusion-3-medium-diffusers-paddle-init.tar.gz +# rm -rf stable-diffusion-3-medium-diffusers-paddle-init +# rm -rf dog.zip +# rm -rf dog +# wget https://bj.bcebos.com/paddlenlp/models/community/westfish/sd3_benchmark/stable-diffusion-3-medium-diffusers-paddle-init.tar.gz +# tar -zxvf stable-diffusion-3-medium-diffusers-paddle-init.tar.gz +# wget https://paddlenlp.bj.bcebos.com/models/community/westfish/develop-sdxl/dog.zip +# unzip dog.zip + +RUN_SETUP=${RUN_SETUP:-"true"} +if [ "$RUN_SETUP" = "true" ]; then + echo "Running setup and installation steps..." + + export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH + python -m pip install --upgrade pip -i https://mirror.baidu.com/pypi/simple + python -m pip install einops -i https://mirror.baidu.com/pypi/simple + python -m pip install -r ../requirements.txt + python -m pip install --upgrade pybind11 regex sentencepiece tqdm visualdl attrdict easydict pyyaml paddlesde -i https://mirror.baidu.com/pypi/simple + python -m pip install paddlenlp==3.0.0b2 + python -m pip install huggingface-hub==0.23.0 + + # uninstall ppdiffusers and install develop paddlemix + python -m pip uninstall -y ppdiffusers + cd ../ppdiffusers/ + python -m pip install -e . + cd - + cd ../ppdiffusers/examples/dreambooth + pip install -r requirements_sd3.txt + cd - + python -m pip list +else + echo "fast mode, skipping setup and installation steps as RUN_SETUP is set to false." +fi diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh new file mode 100644 index 000000000..337ec9082 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh @@ -0,0 +1,196 @@ +#!/usr/bin/env bash + +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test training benchmark for a model. +# Usage:bash benchmark/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} +function _set_params(){ + model_item=${1:-"stable_diffusion_3-dreambooth_ft"} # (必选) 模型 item |fastscnn|segformer_b0| ocrnet_hrnetw48 + base_batch_size=${2:-"1"} # (必选) 如果是静态图单进程,则表示每张卡上的BS,需在训练时*卡数 + fp_item=${3:-"fp32"} # (必选) fp32|fp16|bf16 + run_mode=${4:-"DP"} # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP1-MP4-PP1 + device_num=${5:-"N1C1"} # (必选) 使用的卡数量,N1C1|N1C8|N4C32 (4机32卡) + profiling=${PROFILING:-"false"} # (必选) Profiling 开关,默认关闭,通过全局变量传递 + + model_repo="PaddleMIX" # (必选) 模型套件的名字 + speed_unit="sample/sec" # (必选)速度指标单位 + skip_steps=0 # (必选)解析日志,跳过模型前几个性能不稳定的step + keyword="ips:" # (必选)解析日志,筛选出性能数据所在行的关键字 + convergence_key="loss:" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:" + max_iter=${6:-"20"} # (可选)需保证模型执行时间在5分钟内,需要修改代码提前中断的直接提PR 合入套件 或是max_epoch + num_workers=${7:-"5"} # (可选) + is_large_model=False # (可选)普通模型默认为False,如果添加大模型且只取一条ips设置为True + + # 以下为通用执行命令,无特殊可不用修改 + model_name=${model_item}_bs${base_batch_size}_${fp_item}_${run_mode} # (必填) 且格式不要改动,与竞品名称对齐 + device=${CUDA_VISIBLE_DEVICES//,/ } + arr=(${device}) + num_gpu_devices=${#arr[*]} + run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # (必填) TRAIN_LOG_DIR benchmark框架设置该参数为全局变量 + profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)} # (必填) PROFILING_LOG_DIR benchmark框架设置该参数为全局变量 + speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)} + + train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log + profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling + speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed +} + +function _train(){ + batch_size=${base_batch_size} # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs + echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}" + if [ ${profiling} = "true" ];then + add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\"" + log_file=${profiling_log_file} + else + add_options="" + log_file=${train_log_file} + fi + + # 原生动态图 + export FLAG_FUSED_LINEAR=0 + export FLAGS_conv_workspace_size_limit=4096 + + # # 是否开启 ema + # export FLAG_USE_EMA=0 + # # 是否开启 recompute + # export FLAG_RECOMPUTE=1 + # # 是否开启 xformers + # export FLAG_XFORMERS=1 + export FLAGS_cudnn_deterministic=True + env |grep FLAG + + if [ ${fp_item} = "fp32" ]; then + fp_item_cmd="no" + else + fp_item_cmd=${fp_item} + fi + echo "------------" + ls; + echo "------------" + + # model_path=../ppdiffusers/examples/dreambooth/ + # cd ${model_path} + + if [ ${model_item} = "stable_diffusion_3-dreambooth_ft" ];then + train_cmd=" + ../ppdiffusers/examples/dreambooth/train_dreambooth_sd3.py \ + --pretrained_model_name_or_path=stable-diffusion-3-medium-diffusers-paddle-init \ + --instance_data_dir=dog \ + --output_dir=trained-sd3 \ + --mixed_precision=${fp_item_cmd} \ + --instance_prompt=a-photo-of-sks-dog \ + --resolution=512 \ + --train_batch_size=${batch_size} \ + --gradient_accumulation_steps=4 \ + --learning_rate=5e-5 \ + --report_to=tensorboard \ + --lr_scheduler=constant \ + --lr_warmup_steps=0 \ + --max_train_steps=${max_iter} \ + --validation_prompt=A-photo-of-sks-dog-in-a-bucket \ + --validation_epochs=20 \ + --num_validation_images 1 \ + --seed=0 \ + --checkpointing_steps=10000 + " + else + export USE_PEFT_BACKEND=True + train_cmd=" + ../ppdiffusers/examples/dreambooth/train_dreambooth_lora_sd3.py \ + --pretrained_model_name_or_path=stable-diffusion-3-medium-diffusers-paddle-init \ + --instance_data_dir=dog \ + --output_dir=trained-sd3-lora \ + --mixed_precision=${fp_item_cmd} \ + --instance_prompt=a-photo-of-sks-dog \ + --resolution=512 \ + --train_batch_size=${batch_size} \ + --gradient_accumulation_steps=4 \ + --learning_rate=5e-5 \ + --report_to=tensorboard \ + --lr_scheduler=constant \ + --lr_warmup_steps=0 \ + --max_train_steps=${max_iter} \ + --validation_prompt=A-photo-of-sks-dog-in-a-bucket \ + --validation_epochs=20 \ + --num_validation_images 1 \ + --seed=0 \ + --checkpointing_steps=10000 + " + fi + + # 以下为通用执行命令,无特殊可不用修改 + case ${run_mode} in + DP) if [[ ${device_num} = "N1C1" ]];then + echo "run ${run_mode} " + train_cmd="python -u ${train_cmd}" + else + rm -rf ./mylog # 注意执行前删掉log目录 + train_cmd="python -u -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES \ + ${train_cmd}" + fi + ;; + DP1-MP1-PP1) echo "run run_mode: DP1-MP1-PP1" ;; + *) echo "choose run_mode "; exit 1; + esac + + echo "train_cmd: ${train_cmd} log_file: ${log_file}" + RUN_SETUP=${RUN_SETUP:-"true"} + if [ "$RUN_SETUP" = "true" ]; then + timeout 30m ${train_cmd} > ${log_file} 2>&1 + else + echo "fast mode, only run 3m" + timeout 3m ${train_cmd} > ${log_file} 2>&1 + fi + # eval ${train_cmd} + # eval "timeout 30m ${train_cmd} > ${log_file} 2>&1" + if [ $? -ne 0 ];then + echo -e "${model_name}, FAIL" + else + echo -e "${model_name}, SUCCESS" + fi + # kill -9 `ps -ef|grep 'python'|awk '{print $2}'` + + if [ ${device_num} != "N1C1" -a -d mylog ]; then + rm ${log_file} + cp mylog/workerlog.0 ${log_file} + fi + echo ${train_cmd} >> ${log_file} + cat ${log_file} +} + +function _analysis_log(){ + # cd - + analysis_log_cmd="python test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py \ + ${model_item} ${log_file} ${speed_log_file} ${device_num} ${base_batch_size} ${fp_item}" + echo ${analysis_log_cmd} + eval ${analysis_log_cmd} +} + +_set_params $@ +str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`) +export frame_version=${str_tmp%%.post*} +export frame_commit=$(echo `python -c "import paddle;print(paddle.version.commit)"`) +export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3` +export model_commit=$(git log|head -n1|awk '{print $2}') +echo "---------frame_version is ${frame_version}" +echo "---------Paddle commit is ${frame_commit}" +echo "---------Model commit is ${model_commit}" +echo "---------model_branch is ${model_branch}" + +job_bt=`date '+%Y%m%d%H%M%S'` +_train +job_et=`date '+%Y%m%d%H%M%S'` +export model_run_time=$((${job_et}-${job_bt})) +_analysis_log From 4f080451773b8468dafa280ba0de9986dc12015d Mon Sep 17 00:00:00 2001 From: westfish Date: Fri, 15 Nov 2024 14:20:39 +0800 Subject: [PATCH 03/11] update --- ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py | 2 +- ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py b/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py index 907a92776..80debd760 100644 --- a/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py +++ b/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py @@ -165,7 +165,7 @@ def reset_lora_parameters(self, adapter_name, init_lora_weights): else: raise ValueError(f"Unknown initialization {init_lora_weights=}") nn.init.zeros_(self.lora_B[adapter_name].weight) - if adapter_name in self.lora_embedding_A.keys(): + if adapter_name in dict(self.lora_embedding_A).keys(): # initialize a the same way as the default for nn.linear and b to zero nn.init.zeros_(self.lora_embedding_A[adapter_name]) nn.init.normal_(self.lora_embedding_B[adapter_name]) diff --git a/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py b/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py index 66467f94c..5f643a017 100644 --- a/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py +++ b/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py @@ -416,6 +416,8 @@ def set_adapter(self, adapter_names: str | list[str]) -> None: # Deactivate grads on the inactive adapter and activate grads on the active adapter for layer_name in self.adapter_layer_names: module_dict = getattr(self, layer_name) + if isinstance(module_dict, paddle.nn.ParameterDict): + module_dict = dict(module_dict) for key, layer in module_dict.items(): if key in adapter_names: # Note: It is possible that not a single layer is called with requires_grad_(True) here. This may From cc24c8ff8f82cd82ca46d14131f9c59a520de628 Mon Sep 17 00:00:00 2001 From: westfish Date: Fri, 15 Nov 2024 14:23:32 +0800 Subject: [PATCH 04/11] update --- .../dp/stable_diffusion_3/benchmark_common/prepare.sh | 7 ++++--- .../stable_diffusion_3/benchmark_common/run_benchmark.sh | 8 ++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh index 1df6ab524..800f78eba 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh @@ -45,10 +45,11 @@ if [ "$RUN_SETUP" = "true" ]; then echo "Running setup and installation steps..." export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH - python -m pip install --upgrade pip -i https://mirror.baidu.com/pypi/simple - python -m pip install einops -i https://mirror.baidu.com/pypi/simple + python -m pip install --upgrade pip + # python -m pip install paddlepaddle-gpu==3.0.0b2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ + python -m pip install einops python -m pip install -r ../requirements.txt - python -m pip install --upgrade pybind11 regex sentencepiece tqdm visualdl attrdict easydict pyyaml paddlesde -i https://mirror.baidu.com/pypi/simple + python -m pip install --upgrade pybind11 regex sentencepiece tqdm visualdl attrdict easydict pyyaml paddlesde python -m pip install paddlenlp==3.0.0b2 python -m pip install huggingface-hub==0.23.0 diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh index 337ec9082..6399a7d77 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh @@ -100,7 +100,7 @@ function _train(){ --lr_warmup_steps=0 \ --max_train_steps=${max_iter} \ --validation_prompt=A-photo-of-sks-dog-in-a-bucket \ - --validation_epochs=20 \ + --validation_epochs=100 \ --num_validation_images 1 \ --seed=0 \ --checkpointing_steps=10000 @@ -123,7 +123,7 @@ function _train(){ --lr_warmup_steps=0 \ --max_train_steps=${max_iter} \ --validation_prompt=A-photo-of-sks-dog-in-a-bucket \ - --validation_epochs=20 \ + --validation_epochs=100 \ --num_validation_images 1 \ --seed=0 \ --checkpointing_steps=10000 @@ -146,8 +146,8 @@ function _train(){ esac echo "train_cmd: ${train_cmd} log_file: ${log_file}" - RUN_SETUP=${RUN_SETUP:-"true"} - if [ "$RUN_SETUP" = "true" ]; then + RUN_SLOW=${RUN_SLOW:-"true"} + if [ "$RUN_SLOW" = "true" ]; then timeout 30m ${train_cmd} > ${log_file} 2>&1 else echo "fast mode, only run 3m" From 8e37d7fcab214db008def01ab7becd4614004e67 Mon Sep 17 00:00:00 2001 From: westfish Date: Fri, 15 Nov 2024 15:05:57 +0800 Subject: [PATCH 05/11] update --- ppdiffusers/ppdiffusers/loaders/deprecate.py | 4 ++-- ppdiffusers/ppdiffusers/models/attention_processor.py | 8 ++++---- ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ppdiffusers/ppdiffusers/loaders/deprecate.py b/ppdiffusers/ppdiffusers/loaders/deprecate.py index 2b88f5aeb..e1e72424b 100644 --- a/ppdiffusers/ppdiffusers/loaders/deprecate.py +++ b/ppdiffusers/ppdiffusers/loaders/deprecate.py @@ -19,7 +19,7 @@ def text_encoder_lora_state_dict(text_encoder): deprecate( "text_encoder_load_state_dict in `models`", - "0.27.0", + "0.45.0", "`text_encoder_lora_state_dict` is deprecated and will be removed in 0.27.0. Make sure to retrieve the weights using `get_peft_model`. See https://huggingface.co/docs/peft/v0.6.2/en/quicktour#peftmodel for more information.", ) state_dict = {} @@ -45,7 +45,7 @@ def text_encoder_lora_state_dict(text_encoder): def text_encoder_attn_modules(text_encoder): deprecate( "text_encoder_attn_modules in `models`", - "0.27.0", + "0.45.0", "`text_encoder_lora_state_dict` is deprecated and will be removed in 0.27.0. Make sure to retrieve the weights using `get_peft_model`. See https://huggingface.co/docs/peft/v0.6.2/en/quicktour#peftmodel for more information.", ) from ppdiffusers.transformers import CLIPTextModel, CLIPTextModelWithProjection diff --git a/ppdiffusers/ppdiffusers/models/attention_processor.py b/ppdiffusers/ppdiffusers/models/attention_processor.py index c93c55ae6..d6d878461 100644 --- a/ppdiffusers/ppdiffusers/models/attention_processor.py +++ b/ppdiffusers/ppdiffusers/models/attention_processor.py @@ -362,7 +362,7 @@ def set_processor(self, processor: "AttnProcessor", _remove_lora: bool = False) if not USE_PEFT_BACKEND and hasattr(self, "processor") and _remove_lora and self.to_q.lora_layer is not None: deprecate( "set_processor to offload LoRA", - "0.26.0", + "0.45.0", "In detail, removing LoRA layers via calling `set_default_attn_processor` is deprecated. Please make sure to call `pipe.unload_lora_weights()` instead.", ) # TODO(Patrick, Sayak) - this can be deprecated once PEFT LoRA integration is complete @@ -1635,7 +1635,7 @@ def __call__(self, attn: Attention, hidden_states: paddle.Tensor, *args, **kwarg self_cls_name = self.__class__.__name__ deprecate( self_cls_name, - "0.26.0", + "0.45.0", ( f"Make sure use {self_cls_name[4:]} instead by setting" "LoRA layers to `self.{to_q,to_k,to_v,to_out[0]}.lora_layer` respectively. This will be done automatically when using" @@ -1714,7 +1714,7 @@ def __call__(self, attn: Attention, hidden_states: paddle.Tensor, *args, **kwarg self_cls_name = self.__class__.__name__ deprecate( self_cls_name, - "0.26.0", + "0.45.0", ( f"Make sure use {self_cls_name[4:]} instead by setting" "LoRA layers to `self.{to_q,to_k,to_v,add_k_proj,add_v_proj,to_out[0]}.lora_layer` respectively. This will be done automatically when using" @@ -1773,7 +1773,7 @@ def __call__(self, attn: Attention, hidden_states: paddle.Tensor, *args, **kwarg self_cls_name = self.__class__.__name__ deprecate( self_cls_name, - "0.26.0", + "0.45.0", ( f"Make sure use {self_cls_name[4:]} instead by setting" "LoRA layers to `self.{to_q,to_k,to_v,add_k_proj,add_v_proj,to_out[0]}.lora_layer` respectively. This will be done automatically when using" diff --git a/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py b/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py index 1ba2d67ab..ec9c5e4c4 100644 --- a/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py +++ b/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py @@ -688,10 +688,10 @@ def to(self, *args, **kwargs): paddle_dtype = kwargs.pop("paddle_dtype", None) if paddle_dtype is not None: - deprecate("paddle_dtype", "0.35.0", "") + deprecate("paddle_dtype", "0.45.0", "") paddle_device = kwargs.pop("paddle_device", None) if paddle_device is not None: - deprecate("paddle_device", "0.35.0", "") + deprecate("paddle_device", "0.45.0", "") dtype_kwarg = kwargs.pop("dtype", None) device_kwarg = kwargs.pop("device", None) From 910d9f4a2b5a2028c1e8a4174fe0fe4f09cfb1bf Mon Sep 17 00:00:00 2001 From: westfish Date: Fri, 15 Nov 2024 16:18:39 +0800 Subject: [PATCH 06/11] update --- ...e_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh | 27 ------------------- ...diffusion_3-dreambooth_lora_bs4_bf16_DP.sh | 27 ------------------- ...e_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh | 27 ------------------- ...diffusion_3-dreambooth_lora_bs4_bf16_DP.sh | 27 ------------------- 4 files changed, 108 deletions(-) delete mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh delete mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh delete mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh delete mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh deleted file mode 100644 index f2a24ab7e..000000000 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model_item=stable_diffusion_3-dreambooth_ft -model=stable_diffusion_3 -bs_item=4 -fp_item=bf16 -run_mode=DP -device_num=N1C1 -max_iter=1000 -num_workers=0 - -# get data -bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh -# run -bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh deleted file mode 100644 index 4fb089122..000000000 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model_item=stable_diffusion_3-dreambooth_lora -model=stable_diffusion_3 -bs_item=4 -fp_item=bf16 -run_mode=DP -device_num=N1C1 -max_iter=1000 -num_workers=0 - -# get data -bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh -# run -bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh deleted file mode 100644 index 3c68cfbbe..000000000 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model_item=stable_diffusion_3-dreambooth_ft -model=stable_diffusion_3 -bs_item=4 -fp_item=bf16 -run_mode=DP -device_num=N1C8 -max_iter=1000 -num_workers=0 - -# get data -bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh -# run -bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh deleted file mode 100644 index a8a1b5ea4..000000000 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model_item=stable_diffusion_3-dreambooth_lora -model=stable_diffusion_3 -bs_item=4 -fp_item=bf16 -run_mode=DP -device_num=N1C8 -max_iter=1000 -num_workers=0 - -# get data -bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh -# run -bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; From 07482a90cfa94902b662ac2dadc43600905544a4 Mon Sep 17 00:00:00 2001 From: westfish Date: Fri, 15 Nov 2024 16:28:09 +0800 Subject: [PATCH 07/11] trim --- .../stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh | 2 +- .../stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh | 2 +- .../stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh | 2 +- .../stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh | 2 +- .../stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh | 2 +- .../stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh | 2 +- .../stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh | 2 +- .../stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh | 2 +- .../dp/stable_diffusion_3/benchmark_common/prepare.sh | 10 +--------- .../benchmark_common/run_benchmark.sh | 8 -------- 10 files changed, 9 insertions(+), 25 deletions(-) diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh index 998f2edbc..2d2242d30 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh index 1692f8a5d..1fc7d081b 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh index e69f24dec..6d561ee48 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh index c54b80e7e..13d90c9ba 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh index 1cfe3ce76..fe454c0ef 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh index 94025ce08..ded063935 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh index 88eccc249..686428ad5 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh index 82576d762..ead2da890 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh index 800f78eba..6b6dbdf7c 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,14 +31,6 @@ else echo "Directory dog already exists. Skipping download." fi -# rm -rf stable-diffusion-3-medium-diffusers-paddle-init.tar.gz -# rm -rf stable-diffusion-3-medium-diffusers-paddle-init -# rm -rf dog.zip -# rm -rf dog -# wget https://bj.bcebos.com/paddlenlp/models/community/westfish/sd3_benchmark/stable-diffusion-3-medium-diffusers-paddle-init.tar.gz -# tar -zxvf stable-diffusion-3-medium-diffusers-paddle-init.tar.gz -# wget https://paddlenlp.bj.bcebos.com/models/community/westfish/develop-sdxl/dog.zip -# unzip dog.zip RUN_SETUP=${RUN_SETUP:-"true"} if [ "$RUN_SETUP" = "true" ]; then diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh index 6399a7d77..7d6e3c26b 100644 --- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh @@ -62,12 +62,6 @@ function _train(){ export FLAG_FUSED_LINEAR=0 export FLAGS_conv_workspace_size_limit=4096 - # # 是否开启 ema - # export FLAG_USE_EMA=0 - # # 是否开启 recompute - # export FLAG_RECOMPUTE=1 - # # 是否开启 xformers - # export FLAG_XFORMERS=1 export FLAGS_cudnn_deterministic=True env |grep FLAG @@ -80,8 +74,6 @@ function _train(){ ls; echo "------------" - # model_path=../ppdiffusers/examples/dreambooth/ - # cd ${model_path} if [ ${model_item} = "stable_diffusion_3-dreambooth_ft" ];then train_cmd=" From eefc34602ac743c346ae6741d264aa7b48d2d490 Mon Sep 17 00:00:00 2001 From: westfish Date: Fri, 15 Nov 2024 16:46:00 +0800 Subject: [PATCH 08/11] add sd3 infer bench --- .../deploy/sd3/infer_dygraph_paddle.py | 273 ++++++++++++++ ppdiffusers/deploy/sd3/infer_dygraph_torch.py | 334 ++++++++++++++++++ .../deploy/sd3/scripts/benchmark_paddle.sh | 32 ++ .../deploy/sd3/scripts/benchmark_torch.sh | 26 ++ 4 files changed, 665 insertions(+) create mode 100644 ppdiffusers/deploy/sd3/infer_dygraph_paddle.py create mode 100644 ppdiffusers/deploy/sd3/infer_dygraph_torch.py create mode 100644 ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh create mode 100644 ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py b/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py new file mode 100644 index 000000000..5db7de8b3 --- /dev/null +++ b/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py @@ -0,0 +1,273 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import time +import warnings + +import cv2 +import numpy as np +import paddle +from PIL import Image +from tqdm.auto import trange + +from ppdiffusers import ( + FlowMatchEulerDiscreteScheduler, + DDIMScheduler, + DDPMScheduler, + DEISMultistepScheduler, + DPMSolverMultistepScheduler, + DPMSolverSinglestepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + HeunDiscreteScheduler, + KDPM2AncestralDiscreteScheduler, + KDPM2DiscreteScheduler, + LMSDiscreteScheduler, + PNDMScheduler, + StableDiffusion3Pipeline, + UniPCMultistepScheduler, +) +from ppdiffusers.utils import load_image + + +def get_canny_image(image, args): + if isinstance(image, Image.Image): + image = np.array(image) + image = cv2.Canny(image, args.low_threshold, args.high_threshold) + image = image[:, :, None] + image = np.concatenate([image, image, image], axis=2) + canny_image = Image.fromarray(image) + return canny_image + + +def strtobool(v): + if isinstance(v, bool): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("no", "false", "f", "n", "0"): + return False + else: + raise ValueError( + f"Truthy value expected: got {v} but expected one of yes/no, true/false, t/f, y/n, 1/0 (case insensitive)." + ) + + +def change_scheduler(self, scheduler_type="ddim"): + self.orginal_scheduler_config = self.scheduler.config + scheduler_type = scheduler_type.lower() + if scheduler_type == "flow": + scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True) + elif scheduler_type == "pndm": + scheduler = PNDMScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True) + elif scheduler_type == "lms": + scheduler = LMSDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "heun": + scheduler = HeunDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "euler": + scheduler = EulerDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "euler-ancestral": + scheduler = EulerAncestralDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "dpm-multi": + scheduler = DPMSolverMultistepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "dpm-single": + scheduler = DPMSolverSinglestepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "kdpm2-ancestral": + scheduler = KDPM2AncestralDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "kdpm2": + scheduler = KDPM2DiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "unipc-multi": + scheduler = UniPCMultistepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "ddim": + scheduler = DDIMScheduler.from_config( + self.orginal_scheduler_config, + steps_offset=1, + clip_sample=False, + set_alpha_to_one=False, + ) + elif scheduler_type == "ddpm": + scheduler = DDPMScheduler.from_config( + self.orginal_scheduler_config, + ) + elif scheduler_type == "deis-multi": + scheduler = DEISMultistepScheduler.from_config( + self.orginal_scheduler_config, + ) + else: + raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") + return scheduler + + +def parse_arguments(): + + parser = argparse.ArgumentParser() + parser.add_argument( + "--pretrained_model_name_or_path", + type=str, + default="stabilityai/stable-diffusion-3-medium-diffusers", + help="Path to the `diffusers` checkpoint to convert (either a local directory or on the bos).", + ) + parser.add_argument( + "--inference_steps", + type=int, + default=50, + help="The number of unet inference steps.", + ) + parser.add_argument( + "--benchmark_steps", + type=int, + default=10, + help="The number of performance benchmark steps.", + ) + parser.add_argument( + "--task_name", + type=str, + default="all", + choices=[ + "text2img", + "img2img", + "inpaint_legacy", + "all", + ], + help="The task can be one of [text2img, img2img, inpaint_legacy, all]. ", + ) + parser.add_argument( + "--parse_prompt_type", + type=str, + default="raw", + choices=[ + "raw", + "lpw", + ], + help="The parse_prompt_type can be one of [raw, lpw]. ", + ) + parser.add_argument("--use_fp16", type=strtobool, default=True, help="Wheter to use FP16 mode") + parser.add_argument( + "--attention_type", type=str, default="raw", choices=["raw", "cutlass", "flash", "all"], help="attention_type." + ) + parser.add_argument("--device_id", type=int, default=0, help="The selected gpu id. -1 means use cpu") + parser.add_argument( + "--scheduler", + type=str, + default="euler-ancestral", + choices=[ + "flow", + "pndm", + "lms", + "euler", + "euler-ancestral", + "dpm-multi", + "dpm-single", + "unipc-multi", + "ddim", + "ddpm", + "deis-multi", + "heun", + "kdpm2-ancestral", + "kdpm2", + ], + help="The scheduler type of stable diffusion.", + ) + parser.add_argument("--height", type=int, default=512, help="Height of input image") + parser.add_argument("--width", type=int, default=512, help="Width of input image") + parser.add_argument("--strength", type=float, default=1.0, help="Strength for img2img / inpaint") + return parser.parse_args() + + +def main(args): + + seed = 1024 + paddle_dtype = paddle.float16 if args.use_fp16 else paddle.float32 + pipe = StableDiffusion3Pipeline.from_pretrained( + args.pretrained_model_name_or_path, + safety_checker=None, + feature_extractor=None, + requires_safety_checker=False, + paddle_dtype=paddle_dtype, + ) + scheduler = change_scheduler(pipe, args.scheduler) + pipe.scheduler = scheduler + + if args.attention_type == "all": + args.attention_type = ["raw", "cutlass", "flash"] + else: + args.attention_type = [args.attention_type] + + for attention_type in args.attention_type: + if attention_type == "raw": + pipe.disable_xformers_memory_efficient_attention() + else: + try: + pipe.enable_xformers_memory_efficient_attention(attention_type) + except Exception as e: + if attention_type == "flash": + warnings.warn( + "Attention type flash is not supported on your GPU! We need to use 3060、3070、3080、3090、4060、4070、4080、4090、A30、A100 etc." + ) + continue + else: + raise ValueError(e) + + if not args.use_fp16 and attention_type == "flash": + print("Flash attention is not supported dtype=float32! Please use float16 or bfloat16. We will skip this!") + continue + + width = args.width + height = args.height + pipe.set_progress_bar_config(disable=False) + + folder = f"paddle_attn_{attention_type}_fp16" if args.use_fp16 else f"paddle_attn_{attention_type}_fp32" + os.makedirs(folder, exist_ok=True) + if args.task_name in ["text2img", "all"]: + init_image = load_image( + "https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/control_bird_canny_demo.png" + ) + # text2img + prompt = "bird" + time_costs = [] + # warmup + pipe( + prompt, + num_inference_steps=10, + height=height, + width=width, + ) + print("==> Test text2img performance.") + for step in trange(args.benchmark_steps): + start = time.time() + paddle.seed(seed) + images = pipe( + prompt, + num_inference_steps=args.inference_steps, + height=height, + width=width, + ).images + latency = time.time() - start + time_costs += [latency] + # print(f"No {step:3d} time cost: {latency:2f} s") + print( + f"Attention type: {attention_type}, " + f"Use fp16: {'true' if args.use_fp16 else 'false'}, " + f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, " + f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, " + f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s." + ) + images[0].save(f"{folder}/text2img.png") + + +if __name__ == "__main__": + args = parse_arguments() + main(args) diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_torch.py b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py new file mode 100644 index 000000000..b16869458 --- /dev/null +++ b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py @@ -0,0 +1,334 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import time + +import torch + +torch.nn.functional.scaled_dot_product_attention_ = torch.nn.functional.scaled_dot_product_attention +delattr(torch.nn.functional, "scaled_dot_product_attention") + +import cv2 +import numpy as np +from diffusers import ( + FlowMatchEulerDiscreteScheduler, + DDIMScheduler, + DDPMScheduler, + DEISMultistepScheduler, + DPMSolverMultistepScheduler, + DPMSolverSinglestepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + HeunDiscreteScheduler, + KDPM2AncestralDiscreteScheduler, + KDPM2DiscreteScheduler, + LMSDiscreteScheduler, + PNDMScheduler, + StableDiffusion3Pipeline, + UniPCMultistepScheduler, +) +from diffusers.models.attention_processor import AttnProcessor, AttnProcessor2_0 +from diffusers.utils import load_image +from PIL import Image +from tqdm.auto import trange + + +def get_canny_image(image, args): + if isinstance(image, Image.Image): + image = np.array(image) + image = cv2.Canny(image, args.low_threshold, args.high_threshold) + image = image[:, :, None] + image = np.concatenate([image, image, image], axis=2) + canny_image = Image.fromarray(image) + return canny_image + + +def strtobool(v): + if isinstance(v, bool): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("no", "false", "f", "n", "0"): + return False + else: + raise ValueError( + f"Truthy value expected: got {v} but expected one of yes/no, true/false, t/f, y/n, 1/0 (case insensitive)." + ) + + +def change_scheduler(self, scheduler_type="ddim"): + self.orginal_scheduler_config = self.scheduler.config + scheduler_type = scheduler_type.lower() + if scheduler_type == "flow": + scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True) + elif scheduler_type == "pndm": + scheduler = PNDMScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True) + elif scheduler_type == "lms": + scheduler = LMSDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "heun": + scheduler = HeunDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "euler": + scheduler = EulerDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "euler-ancestral": + scheduler = EulerAncestralDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "dpm-multi": + scheduler = DPMSolverMultistepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "dpm-single": + scheduler = DPMSolverSinglestepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "kdpm2-ancestral": + scheduler = KDPM2AncestralDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "kdpm2": + scheduler = KDPM2DiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "unipc-multi": + scheduler = UniPCMultistepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "ddim": + scheduler = DDIMScheduler.from_config( + self.orginal_scheduler_config, + steps_offset=1, + clip_sample=False, + set_alpha_to_one=False, + ) + elif scheduler_type == "ddpm": + scheduler = DDPMScheduler.from_config( + self.orginal_scheduler_config, + ) + elif scheduler_type == "deis-multi": + scheduler = DEISMultistepScheduler.from_config( + self.orginal_scheduler_config, + ) + else: + raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") + return scheduler + + +def parse_arguments(): + + parser = argparse.ArgumentParser() + parser.add_argument( + "--pretrained_model_name_or_path", + type=str, + default="stabilityai/stable-diffusion-3-medium-diffusers", + help="Path to the `diffusers` checkpoint to convert (either a local directory or on the bos).", + ) + parser.add_argument( + "--inference_steps", + type=int, + default=50, + help="The number of unet inference steps.", + ) + parser.add_argument( + "--benchmark_steps", + type=int, + default=10, + help="The number of performance benchmark steps.", + ) + parser.add_argument( + "--task_name", + type=str, + default="all", + choices=[ + "text2img", + "img2img", + "inpaint_legacy", + "all", + ], + help="The task can be one of [text2img, img2img, inpaint_legacy, all]. ", + ) + parser.add_argument( + "--parse_prompt_type", + type=str, + default="raw", + choices=[ + "raw", + "lpw", + ], + help="The parse_prompt_type can be one of [raw, lpw]. ", + ) + parser.add_argument( + "--channels_last", + type=strtobool, + default=False, + help="Wheter to use channels_last", + ) + parser.add_argument("--use_fp16", type=strtobool, default=True, help="Wheter to use FP16 mode") + parser.add_argument("--tf32", type=strtobool, default=True, help="tf32") + parser.add_argument("--compile", type=strtobool, default=False, help="compile") + parser.add_argument( + "--attention_type", + type=str, + default="sdp", + choices=[ + "raw", + "sdp", + ], + help="attention_type.", + ) + parser.add_argument("--device_id", type=int, default=0, help="The selected gpu id. -1 means use cpu") + parser.add_argument( + "--scheduler", + type=str, + default="euler-ancestral", + choices=[ + "flow", + "pndm", + "lms", + "euler", + "euler-ancestral", + "dpm-multi", + "dpm-single", + "unipc-multi", + "ddim", + "ddpm", + "deis-multi", + "heun", + "kdpm2-ancestral", + "kdpm2", + ], + help="The scheduler type of stable diffusion.", + ) + parser.add_argument("--height", type=int, default=512, help="Height of input image") + parser.add_argument("--width", type=int, default=512, help="Width of input image") + parser.add_argument("--strength", type=float, default=1.0, help="Strength for img2img / inpaint") + return parser.parse_args() + + +def attn_processors(self): + processors = {} + + def fn_recursive_add_processors(name: str, module, processors): + if hasattr(module, "set_processor"): + processors[f"{name}.processor"] = module.processor + + for sub_name, child in module.named_children(): + fn_recursive_add_processors(f"{name}.{sub_name}", child, processors) + + return processors + + for name, module in self.named_children(): + fn_recursive_add_processors(name, module, processors) + + return processors + + +def set_attn_processor(self, processor): + count = len(attn_processors(self).keys()) + + if isinstance(processor, dict) and len(processor) != count: + raise ValueError( + f"A dict of processors was passed, but the number of processors {len(processor)} does not match the" + f" number of attention layers: {count}. Please make sure to pass {count} processor classes." + ) + + def fn_recursive_attn_processor(name: str, module, processor): + if hasattr(module, "set_processor"): + if not isinstance(processor, dict): + module.set_processor(processor) + else: + module.set_processor(processor.pop(f"{name}.processor")) + + for sub_name, child in module.named_children(): + fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor) + + for name, module in self.named_children(): + fn_recursive_attn_processor(name, module, processor) + + +def main(args): + if args.tf32: + torch.backends.cuda.matmul.allow_tf32 = True + else: + torch.backends.cuda.matmul.allow_tf32 = False + + seed = 1024 + torch_dtype = torch.float16 if args.use_fp16 else torch.float32 + pipe = StableDiffusion3Pipeline.from_pretrained( + args.pretrained_model_name_or_path, + safety_checker=None, + feature_extractor=None, + requires_safety_checker=False, + torch_dtype=torch_dtype, + ) + scheduler = change_scheduler(pipe, args.scheduler) + pipe.scheduler = scheduler + if args.device_id >= 0: + pipe.to(f"cuda:{args.device_id}") + + if args.attention_type == "all": + args.attention_type = ["raw", "sdp"] + else: + args.attention_type = [args.attention_type] + + for attention_type in args.attention_type: + attn_prrocessor_cls = AttnProcessor if attention_type == "raw" else AttnProcessor2_0 + if attention_type == "sdp": + torch.nn.functional.scaled_dot_product_attention = torch.nn.functional.scaled_dot_product_attention_ + set_attn_processor(pipe.unet, attn_prrocessor_cls()) + set_attn_processor(pipe.vae, attn_prrocessor_cls()) + + if args.channels_last: + pipe.unet.to(memory_format=torch.channels_last) + + if args.compile: + print("Run torch compile") + pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) + + width = args.width + height = args.height + pipe.set_progress_bar_config(disable=False) + + folder = f"torch_attn_{attention_type}_fp16" if args.use_fp16 else f"torch_attn_{attention_type}_fp32" + os.makedirs(folder, exist_ok=True) + if args.task_name in ["text2img", "all"]: + init_image = load_image( + "https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/control_bird_canny_demo.png" + ) + # text2img + prompt = "bird" + time_costs = [] + # warmup + pipe( + prompt, + num_inference_steps=10, + height=height, + width=width, + ) + print("==> Test text2img performance.") + for step in trange(args.benchmark_steps): + start = time.time() + torch.cuda.manual_seed(seed) + images = pipe( + prompt, + num_inference_steps=args.inference_steps, + height=height, + width=width, + ).images + latency = time.time() - start + time_costs += [latency] + # print(f"No {step:3d} time cost: {latency:2f} s") + print( + f"Attention type: {attention_type}, " + f"Use fp16: {'true' if args.use_fp16 else 'false'}, " + f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, " + f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, " + f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s." + ) + images[0].save(f"{folder}/text2img.png") + + + +if __name__ == "__main__": + args = parse_arguments() + main(args) diff --git a/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh b/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh new file mode 100644 index 000000000..a0c2d8d45 --- /dev/null +++ b/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh @@ -0,0 +1,32 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# attention raw fp16 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + +# attention cutlass fp16 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type cutlass --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + +# attention flash fp16 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type flash --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + + +# attention raw fp32 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + +# attention cutlass fp32 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type cutlass --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + +# attention flash fp32 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type flash --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 diff --git a/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh new file mode 100644 index 000000000..9ef75119d --- /dev/null +++ b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh @@ -0,0 +1,26 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# attention raw +python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + +# attention sdp +python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + + +# attention raw fp32 +python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + +# attention sdp fp32 +python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 \ No newline at end of file From 684db2ed89775b0d9c504c63e06ac29e0b9213bd Mon Sep 17 00:00:00 2001 From: westfish Date: Fri, 15 Nov 2024 17:21:11 +0800 Subject: [PATCH 09/11] update --- ppdiffusers/deploy/sd3/infer_dygraph_torch.py | 28 +++++++++---------- .../deploy/sd3/scripts/benchmark_torch.sh | 11 ++------ 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_torch.py b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py index b16869458..621e87f27 100644 --- a/ppdiffusers/deploy/sd3/infer_dygraph_torch.py +++ b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py @@ -18,8 +18,8 @@ import torch -torch.nn.functional.scaled_dot_product_attention_ = torch.nn.functional.scaled_dot_product_attention -delattr(torch.nn.functional, "scaled_dot_product_attention") +# torch.nn.functional.scaled_dot_product_attention_ = torch.nn.functional.scaled_dot_product_attention +# delattr(torch.nn.functional, "scaled_dot_product_attention") import cv2 import numpy as np @@ -272,18 +272,18 @@ def main(args): args.attention_type = [args.attention_type] for attention_type in args.attention_type: - attn_prrocessor_cls = AttnProcessor if attention_type == "raw" else AttnProcessor2_0 - if attention_type == "sdp": - torch.nn.functional.scaled_dot_product_attention = torch.nn.functional.scaled_dot_product_attention_ - set_attn_processor(pipe.unet, attn_prrocessor_cls()) - set_attn_processor(pipe.vae, attn_prrocessor_cls()) - - if args.channels_last: - pipe.unet.to(memory_format=torch.channels_last) - - if args.compile: - print("Run torch compile") - pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) + # attn_prrocessor_cls = AttnProcessor if attention_type == "raw" else AttnProcessor2_0 + # if attention_type == "sdp": + # torch.nn.functional.scaled_dot_product_attention = torch.nn.functional.scaled_dot_product_attention_ + # set_attn_processor(pipe.transformer, attn_prrocessor_cls()) + # set_attn_processor(pipe.vae, attn_prrocessor_cls()) + + # if args.channels_last: + # pipe.transformer.to(memory_format=torch.channels_last) + + # if args.compile: + # print("Run torch compile") + # pipe.unet = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=True) width = args.width height = args.height diff --git a/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh index 9ef75119d..24ca54337 100644 --- a/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh +++ b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh @@ -12,15 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -# attention raw -python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 +# sd3 do ot supprot attention raw # attention sdp -python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 - - -# attention raw fp32 -python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 +python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 --pretrained_model_name_or_path /root/paddlejob/workspace/env_run/output/zhangxu/benchmark/frame_benchmark/pytorch/dynamic/PaddleMIX/models/diffusers/stable-diffusion-3-medium-diffusers # attention sdp fp32 -python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 \ No newline at end of file +python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 --pretrained_model_name_or_path /root/paddlejob/workspace/env_run/output/zhangxu/benchmark/frame_benchmark/pytorch/dynamic/PaddleMIX/models/diffusers/stable-diffusion-3-medium-diffusers \ No newline at end of file From e28c3be02bd0c0f28316b812d95512cda8b64dbb Mon Sep 17 00:00:00 2001 From: westfish Date: Fri, 15 Nov 2024 17:26:53 +0800 Subject: [PATCH 10/11] update --- ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh index 24ca54337..020c54969 100644 --- a/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh +++ b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh @@ -15,7 +15,7 @@ # sd3 do ot supprot attention raw # attention sdp -python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 --pretrained_model_name_or_path /root/paddlejob/workspace/env_run/output/zhangxu/benchmark/frame_benchmark/pytorch/dynamic/PaddleMIX/models/diffusers/stable-diffusion-3-medium-diffusers +python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 # attention sdp fp32 -python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 --pretrained_model_name_or_path /root/paddlejob/workspace/env_run/output/zhangxu/benchmark/frame_benchmark/pytorch/dynamic/PaddleMIX/models/diffusers/stable-diffusion-3-medium-diffusers \ No newline at end of file +python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 \ No newline at end of file From a7b824c2ad1f437d59ec9c6c76cbd8d0b7051933 Mon Sep 17 00:00:00 2001 From: westfish Date: Wed, 27 Nov 2024 03:53:01 +0000 Subject: [PATCH 11/11] update --- ppdiffusers/deploy/sd3/infer_dygraph_paddle.py | 9 --------- ppdiffusers/deploy/sd3/infer_dygraph_torch.py | 9 --------- 2 files changed, 18 deletions(-) diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py b/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py index 5db7de8b3..14d1f5f24 100644 --- a/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py +++ b/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py @@ -43,15 +43,6 @@ from ppdiffusers.utils import load_image -def get_canny_image(image, args): - if isinstance(image, Image.Image): - image = np.array(image) - image = cv2.Canny(image, args.low_threshold, args.high_threshold) - image = image[:, :, None] - image = np.concatenate([image, image, image], axis=2) - canny_image = Image.fromarray(image) - return canny_image - def strtobool(v): if isinstance(v, bool): diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_torch.py b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py index 621e87f27..14c547b56 100644 --- a/ppdiffusers/deploy/sd3/infer_dygraph_torch.py +++ b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py @@ -46,15 +46,6 @@ from tqdm.auto import trange -def get_canny_image(image, args): - if isinstance(image, Image.Image): - image = np.array(image) - image = cv2.Canny(image, args.low_threshold, args.high_threshold) - image = image[:, :, None] - image = np.concatenate([image, image, image], axis=2) - canny_image = Image.fromarray(image) - return canny_image - def strtobool(v): if isinstance(v, bool):