From 20c006c35475c564e5c0803f64da0e7a1bac8f84 Mon Sep 17 00:00:00 2001
From: westfish <westfish@126.com>
Date: Fri, 1 Nov 2024 04:16:55 +0000
Subject: [PATCH 01/11] update sd readme

---
 ppdiffusers/examples/stable_diffusion/README.md        | 2 +-
 ppdiffusers/examples/stable_diffusion/requirements.txt | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/ppdiffusers/examples/stable_diffusion/README.md b/ppdiffusers/examples/stable_diffusion/README.md
index 4bde9bd0b..f95b580ad 100644
--- a/ppdiffusers/examples/stable_diffusion/README.md
+++ b/ppdiffusers/examples/stable_diffusion/README.md
@@ -41,7 +41,7 @@ cd PaddleMIX/ppdiffusers/examples/stable_diffusion
 pip install -r requirements.txt
 ```
 
-> 注：本模型训练与推理需要依赖 CUDA 11.2 及以上版本，如果本地机器不符合要求，建议前往 [AI Studio](https://aistudio.baidu.com/index) 进行模型训练、推理任务。
+> 注：本模型训练与推理需要依赖 CUDA 11.2 及以上版本，如果本地机器不符合要求，建议前往 [AI Studio](https://aistudio.baidu.com/index) 进行模型训练、推理任务。推荐使用Linux系统，Windows系统未经过系统测试。
 
 ## 3. 数据准备
 
diff --git a/ppdiffusers/examples/stable_diffusion/requirements.txt b/ppdiffusers/examples/stable_diffusion/requirements.txt
index 06a6c755c..c3f48edc2 100644
--- a/ppdiffusers/examples/stable_diffusion/requirements.txt
+++ b/ppdiffusers/examples/stable_diffusion/requirements.txt
@@ -1,6 +1,3 @@
-paddlenlp>=2.6.1
-ppdiffusers>=0.19.3
 fastcore
 visualdl
-Pillow
 safetensors
\ No newline at end of file

From 4d50051b031674088838cc3130a943d5f98b4646 Mon Sep 17 00:00:00 2001
From: westfish <westfish@126.com>
Date: Fri, 15 Nov 2024 11:41:48 +0800
Subject: [PATCH 02/11] add test_tipc/dygraph/dp/stable_diffusion_3

---
 ...e_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh |  27 +++
 ...e_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh |  27 +++
 ...e_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh |  27 +++
 ...diffusion_3-dreambooth_lora_bs1_fp16_DP.sh |  27 +++
 ...diffusion_3-dreambooth_lora_bs4_bf16_DP.sh |  27 +++
 ...diffusion_3-dreambooth_lora_bs4_fp16_DP.sh |  27 +++
 ...e_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh |  27 +++
 ...e_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh |  27 +++
 ...e_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh |  27 +++
 ...diffusion_3-dreambooth_lora_bs1_fp16_DP.sh |  27 +++
 ...diffusion_3-dreambooth_lora_bs4_bf16_DP.sh |  27 +++
 ...diffusion_3-dreambooth_lora_bs4_fp16_DP.sh |  27 +++
 .../benchmark_common/analysis_log.py          | 155 ++++++++++++++
 .../benchmark_common/prepare.sh               |  66 ++++++
 .../benchmark_common/run_benchmark.sh         | 196 ++++++++++++++++++
 15 files changed, 741 insertions(+)
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh
 create mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh

diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
new file mode 100644
index 000000000..998f2edbc
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_ft
+model=stable_diffusion_3
+bs_item=1
+fp_item=fp16
+run_mode=DP
+device_num=N1C1
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
new file mode 100644
index 000000000..f2a24ab7e
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_ft
+model=stable_diffusion_3
+bs_item=4
+fp_item=bf16
+run_mode=DP
+device_num=N1C1
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
new file mode 100644
index 000000000..1692f8a5d
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_ft
+model=stable_diffusion_3
+bs_item=4
+fp_item=fp16
+run_mode=DP
+device_num=N1C1
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
new file mode 100644
index 000000000..e69f24dec
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_lora
+model=stable_diffusion_3
+bs_item=1
+fp_item=fp16
+run_mode=DP
+device_num=N1C1
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh
new file mode 100644
index 000000000..4fb089122
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_lora
+model=stable_diffusion_3
+bs_item=4
+fp_item=bf16
+run_mode=DP
+device_num=N1C1
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
new file mode 100644
index 000000000..c54b80e7e
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_lora
+model=stable_diffusion_3
+bs_item=4
+fp_item=fp16
+run_mode=DP
+device_num=N1C1
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
new file mode 100644
index 000000000..1cfe3ce76
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_ft
+model=stable_diffusion_3
+bs_item=1
+fp_item=fp16
+run_mode=DP
+device_num=N1C8
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
new file mode 100644
index 000000000..3c68cfbbe
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_ft
+model=stable_diffusion_3
+bs_item=4
+fp_item=bf16
+run_mode=DP
+device_num=N1C8
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
new file mode 100644
index 000000000..94025ce08
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_ft
+model=stable_diffusion_3
+bs_item=4
+fp_item=fp16
+run_mode=DP
+device_num=N1C8
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
new file mode 100644
index 000000000..88eccc249
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_lora
+model=stable_diffusion_3
+bs_item=1
+fp_item=fp16
+run_mode=DP
+device_num=N1C8
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh
new file mode 100644
index 000000000..a8a1b5ea4
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_lora
+model=stable_diffusion_3
+bs_item=4
+fp_item=bf16
+run_mode=DP
+device_num=N1C8
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
new file mode 100644
index 000000000..82576d762
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=stable_diffusion_3-dreambooth_lora
+model=stable_diffusion_3
+bs_item=4
+fp_item=fp16
+run_mode=DP
+device_num=N1C8
+max_iter=1000
+num_workers=0
+
+# get data
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py
new file mode 100644
index 000000000..7606e8adc
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py
@@ -0,0 +1,155 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+import json
+import os
+import re
+import sys
+from pdb import line_prefix
+
+import numpy as np
+from numpy import mean, var
+
+class TimeAnalyzer(object):
+    def __init__(self, filename, keyword=None, loss_keyword=None):
+        if filename is None:
+            raise Exception("Please specify the filename!")
+
+        if keyword is None:
+            raise Exception("Please specify the keyword!")
+
+        self.filename = filename
+        self.keyword = keyword
+        self.loss_keyword = loss_keyword
+
+    def get_ips(self):
+        ips_list = []
+        loss_list = []
+        loss_value = None
+        with open(self.filename, "r") as f_object:
+            lines = f_object.read().splitlines()
+            for line in lines:
+                if self.keyword not in line:
+                    continue
+                try:
+                    # result = None
+
+                    # # Distill the string from a line.
+                    # line = line.strip()
+                    # line_words = line.split()
+                    # for i in range(len(line_words) - 1):
+                    #     if line_words[i] == self.keyword:
+                    #         result = float(line_words[i + 1].replace(',', ''))
+                    #         ips_list.append(result)
+                    #     if line_words[i] == self.loss_keyword:
+                    #         # 剔除掉该值后面的逗号并保留5位小数点
+                    #         loss_value = line_words[i + 1].replace(',', '')  
+                    #         # 保留5位小数
+                    #         # loss_value = float("{:.5f}".format(float(loss_str_without_comma)))
+                            
+                    # # Distil the result from the picked string.
+
+                    # 提取 ips
+                    ips_match = re.search(r'(\d+\.\d+)it/s', line)
+                    if ips_match:
+                        ips = float(ips_match.group(1))
+                        ips_list.append(ips)
+
+                    # 提取 loss
+                    loss_match = re.search(r'loss=(\d+\.\d+)', line)
+                    if loss_match:
+                        loss = float(loss_match.group(1))
+                        loss_list.append(loss)
+                        loss_value = loss
+
+                except Exception as exc:
+                    print("line is: {}; failed".format(line))
+                    print("Exception: {}".format(exc))
+        if loss_value is None:
+            loss_value = -1
+        def ewma(data, alpha):
+            smoothed_data = []
+            for i, value in enumerate(data):
+                if i == 0:
+                    smoothed_data.append(value)
+                else:
+                    smoothed_value = alpha * value + (1 - alpha) * smoothed_data[-1]
+                    smoothed_data.append(smoothed_value)
+            return smoothed_data
+        smoothed_loss = ewma(loss_list, 0.9)[-1]
+        return mean(ips_list[4:]), loss_value, smoothed_loss
+
+
+def analyze(model_item, log_file, res_log_file, device_num, bs, fp_item):
+
+    analyzer = TimeAnalyzer(log_file, 'Steps:', None)
+    ips, convergence_value, smoothed_value = analyzer.get_ips()
+    ips = round(ips, 3)
+    # with open(str(log_file), "r", encoding="utf8") as f:
+    #     data = f.readlines()
+    # ips_lines = []
+    # for eachline in data:
+    #     if "train_samples_per_second:" in eachline:
+    #         ips = float(eachline.split("train_samples_per_second: ")[1].split()[0].replace(',', ''))
+    #         print("----ips: ", ips)
+    #         ips_lines.append(ips)
+    # print("----ips_lines: ", ips_lines)
+    # ips = np.round(np.mean(ips_lines), 3)
+    ngpus = int(re.findall("\d+", device_num)[-1])
+    batch_size = int(re.findall("\d+", str(bs))[-1])
+    print("----ips: ", ips, "ngpus", ngpus, "batch_size", batch_size)
+    ips *= batch_size
+    ips *= ngpus
+    run_mode = "DP"
+
+    model_name = model_item + "_" + "bs" + str(bs) + "_" + fp_item + "_" + run_mode
+    info = {
+        "model_branch": os.getenv("model_branch"),
+        "model_commit": os.getenv("model_commit"),
+        "model_name": model_name,
+        "batch_size": bs,
+        "fp_item": fp_item,
+        "run_mode": run_mode,
+        "convergence_value": convergence_value,
+        "smoothed_value": smoothed_value,
+        "convergence_key": "",
+        "ips": ips,
+        "speed_unit": "sample/sec",
+        "device_num": device_num,
+        "model_run_time": os.getenv("model_run_time"),
+        "frame_commit": "",
+        "frame_version": os.getenv("frame_version"),
+    }
+    json_info = json.dumps(info)
+    print(json_info)
+    with open(res_log_file, "w") as of:
+        of.write(json_info)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 7:
+        print("Usage:" + sys.argv[0] + " model_item path/to/log/file path/to/res/log/file")
+        sys.exit()
+    
+
+    model_item = sys.argv[1]
+    log_file = sys.argv[2]
+    res_log_file = sys.argv[3]
+    device_num = sys.argv[4]
+    bs = int(sys.argv[5])
+    fp_item = sys.argv[6]
+
+    analyze(model_item, log_file, res_log_file, device_num, bs, fp_item)
\ No newline at end of file
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh
new file mode 100644
index 000000000..1df6ab524
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh
@@ -0,0 +1,66 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+if [ ! -d "stable-diffusion-3-medium-diffusers-paddle-init" ]; then
+    echo "Downloading stable-diffusion-3-medium-diffusers-paddle-init.tar.gz..."
+    wget https://bj.bcebos.com/paddlenlp/models/community/westfish/sd3_benchmark/stable-diffusion-3-medium-diffusers-paddle-init.tar.gz
+    echo "Extracting stable-diffusion-3-medium-diffusers-paddle-init.tar.gz..."
+    tar -zxvf stable-diffusion-3-medium-diffusers-paddle-init.tar.gz
+else
+    echo "Directory stable-diffusion-3-medium-diffusers-paddle-init already exists. Skipping download."
+fi
+
+if [ ! -d "dog" ]; then
+    echo "Downloading dog.zip..."
+    wget https://paddlenlp.bj.bcebos.com/models/community/westfish/develop-sdxl/dog.zip
+    echo "Unzipping dog.zip..."
+    unzip dog.zip
+else
+    echo "Directory dog already exists. Skipping download."
+fi
+
+# rm -rf stable-diffusion-3-medium-diffusers-paddle-init.tar.gz
+# rm -rf stable-diffusion-3-medium-diffusers-paddle-init
+# rm -rf dog.zip
+# rm -rf dog
+# wget https://bj.bcebos.com/paddlenlp/models/community/westfish/sd3_benchmark/stable-diffusion-3-medium-diffusers-paddle-init.tar.gz
+# tar -zxvf stable-diffusion-3-medium-diffusers-paddle-init.tar.gz
+# wget https://paddlenlp.bj.bcebos.com/models/community/westfish/develop-sdxl/dog.zip
+# unzip dog.zip
+
+RUN_SETUP=${RUN_SETUP:-"true"}
+if [ "$RUN_SETUP" = "true" ]; then
+    echo "Running setup and installation steps..."
+
+    export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH
+    python -m pip install --upgrade pip -i https://mirror.baidu.com/pypi/simple
+    python -m pip install einops -i https://mirror.baidu.com/pypi/simple
+    python -m pip install -r ../requirements.txt
+    python -m pip install --upgrade pybind11 regex sentencepiece tqdm visualdl attrdict easydict pyyaml paddlesde -i https://mirror.baidu.com/pypi/simple
+    python -m pip install paddlenlp==3.0.0b2
+    python -m pip install huggingface-hub==0.23.0
+
+    # uninstall ppdiffusers and install develop paddlemix
+    python -m pip uninstall -y ppdiffusers
+    cd ../ppdiffusers/
+    python -m pip install -e .
+    cd -
+    cd ../ppdiffusers/examples/dreambooth
+    pip install -r requirements_sd3.txt
+    cd -
+    python -m pip list
+else
+    echo "fast mode, skipping setup and installation steps as RUN_SETUP is set to false."
+fi
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh
new file mode 100644
index 000000000..337ec9082
--- /dev/null
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh
@@ -0,0 +1,196 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test training benchmark for a model.
+# Usage：bash benchmark/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num}
+function _set_params(){
+    model_item=${1:-"stable_diffusion_3-dreambooth_ft"}   # (必选) 模型 item |fastscnn|segformer_b0| ocrnet_hrnetw48
+    base_batch_size=${2:-"1"}       # (必选) 如果是静态图单进程，则表示每张卡上的BS，需在训练时*卡数
+    fp_item=${3:-"fp32"}            # (必选) fp32|fp16|bf16
+    run_mode=${4:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP1-MP4-PP1
+    device_num=${5:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
+    profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
+
+    model_repo="PaddleMIX"          # (必选) 模型套件的名字
+    speed_unit="sample/sec"         # (必选)速度指标单位
+    skip_steps=0                  # (必选)解析日志，跳过模型前几个性能不稳定的step
+    keyword="ips:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
+    convergence_key="loss:"        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
+    max_iter=${6:-"20"}                 # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件  或是max_epoch
+    num_workers=${7:-"5"}                # (可选)
+    is_large_model=False           # (可选)普通模型默认为False，如果添加大模型且只取一条ips设置为True
+
+    # 以下为通用执行命令，无特殊可不用修改
+    model_name=${model_item}_bs${base_batch_size}_${fp_item}_${run_mode}  # (必填) 且格式不要改动,与竞品名称对齐
+    device=${CUDA_VISIBLE_DEVICES//,/ }
+    arr=(${device})
+    num_gpu_devices=${#arr[*]}
+    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # （必填） TRAIN_LOG_DIR  benchmark框架设置该参数为全局变量
+    profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)}  # （必填） PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
+    speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
+
+    train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
+    profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
+    speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed
+}
+
+function _train(){
+    batch_size=${base_batch_size}  # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs
+    echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"
+    if [ ${profiling} = "true" ];then
+            add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
+            log_file=${profiling_log_file}
+        else
+            add_options=""
+            log_file=${train_log_file}
+    fi
+
+    # 原生动态图
+    export FLAG_FUSED_LINEAR=0
+    export FLAGS_conv_workspace_size_limit=4096
+
+    # # 是否开启 ema
+    # export FLAG_USE_EMA=0
+    # # 是否开启 recompute
+    # export FLAG_RECOMPUTE=1
+    # # 是否开启 xformers
+    # export FLAG_XFORMERS=1
+    export FLAGS_cudnn_deterministic=True
+    env |grep FLAG
+
+    if [ ${fp_item} = "fp32" ]; then
+        fp_item_cmd="no"
+    else
+        fp_item_cmd=${fp_item}
+    fi
+    echo "------------"
+    ls;
+    echo "------------"
+
+    # model_path=../ppdiffusers/examples/dreambooth/
+    # cd ${model_path}
+
+    if [ ${model_item} = "stable_diffusion_3-dreambooth_ft" ];then
+        train_cmd="
+            ../ppdiffusers/examples/dreambooth/train_dreambooth_sd3.py \
+            --pretrained_model_name_or_path=stable-diffusion-3-medium-diffusers-paddle-init  \
+            --instance_data_dir=dog \
+            --output_dir=trained-sd3 \
+            --mixed_precision=${fp_item_cmd} \
+            --instance_prompt=a-photo-of-sks-dog \
+            --resolution=512 \
+            --train_batch_size=${batch_size} \
+            --gradient_accumulation_steps=4 \
+            --learning_rate=5e-5 \
+            --report_to=tensorboard \
+            --lr_scheduler=constant \
+            --lr_warmup_steps=0 \
+            --max_train_steps=${max_iter} \
+            --validation_prompt=A-photo-of-sks-dog-in-a-bucket \
+            --validation_epochs=20 \
+            --num_validation_images 1 \
+            --seed=0 \
+            --checkpointing_steps=10000
+        "
+    else
+        export USE_PEFT_BACKEND=True
+        train_cmd="
+            ../ppdiffusers/examples/dreambooth/train_dreambooth_lora_sd3.py \
+            --pretrained_model_name_or_path=stable-diffusion-3-medium-diffusers-paddle-init  \
+            --instance_data_dir=dog \
+            --output_dir=trained-sd3-lora \
+            --mixed_precision=${fp_item_cmd} \
+            --instance_prompt=a-photo-of-sks-dog \
+            --resolution=512 \
+            --train_batch_size=${batch_size} \
+            --gradient_accumulation_steps=4 \
+            --learning_rate=5e-5 \
+            --report_to=tensorboard \
+            --lr_scheduler=constant \
+            --lr_warmup_steps=0 \
+            --max_train_steps=${max_iter} \
+            --validation_prompt=A-photo-of-sks-dog-in-a-bucket \
+            --validation_epochs=20 \
+            --num_validation_images 1 \
+            --seed=0 \
+            --checkpointing_steps=10000
+        "
+    fi 
+
+    # 以下为通用执行命令，无特殊可不用修改
+    case ${run_mode} in
+    DP) if [[ ${device_num} = "N1C1" ]];then
+            echo "run ${run_mode} "
+            train_cmd="python -u ${train_cmd}"
+        else
+            rm -rf ./mylog   # 注意执行前删掉log目录
+            train_cmd="python -u -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES \
+                  ${train_cmd}"
+        fi
+        ;;
+    DP1-MP1-PP1)  echo "run run_mode: DP1-MP1-PP1" ;;
+    *) echo "choose run_mode "; exit 1;
+    esac
+
+    echo "train_cmd: ${train_cmd}  log_file: ${log_file}"
+    RUN_SETUP=${RUN_SETUP:-"true"}
+    if [ "$RUN_SETUP" = "true" ]; then
+        timeout 30m ${train_cmd} > ${log_file} 2>&1
+    else
+        echo "fast mode, only run 3m"
+        timeout 3m ${train_cmd} > ${log_file} 2>&1
+    fi
+    # eval ${train_cmd}
+    # eval "timeout 30m ${train_cmd} > ${log_file} 2>&1"
+    if [ $? -ne 0 ];then
+        echo -e "${model_name}, FAIL"
+    else
+        echo -e "${model_name}, SUCCESS"
+    fi
+    # kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
+
+    if [ ${device_num} != "N1C1" -a -d mylog ]; then
+        rm ${log_file}
+        cp mylog/workerlog.0 ${log_file}
+    fi
+    echo ${train_cmd} >> ${log_file}
+    cat ${log_file}
+}
+
+function _analysis_log(){
+    # cd -
+    analysis_log_cmd="python test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py \
+        ${model_item} ${log_file} ${speed_log_file} ${device_num} ${base_batch_size} ${fp_item}"
+    echo ${analysis_log_cmd}
+    eval ${analysis_log_cmd}
+}
+
+_set_params $@
+str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
+export frame_version=${str_tmp%%.post*}
+export frame_commit=$(echo `python -c "import paddle;print(paddle.version.commit)"`)
+export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
+export model_commit=$(git log|head -n1|awk '{print $2}')
+echo "---------frame_version is ${frame_version}"
+echo "---------Paddle commit is ${frame_commit}"
+echo "---------Model commit is ${model_commit}"
+echo "---------model_branch is ${model_branch}"
+
+job_bt=`date '+%Y%m%d%H%M%S'`
+_train
+job_et=`date '+%Y%m%d%H%M%S'`
+export model_run_time=$((${job_et}-${job_bt}))
+_analysis_log

From 4f080451773b8468dafa280ba0de9986dc12015d Mon Sep 17 00:00:00 2001
From: westfish <westfish@126.com>
Date: Fri, 15 Nov 2024 14:20:39 +0800
Subject: [PATCH 03/11] update

---
 ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py   | 2 +-
 ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py b/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py
index 907a92776..80debd760 100644
--- a/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py
+++ b/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py
@@ -165,7 +165,7 @@ def reset_lora_parameters(self, adapter_name, init_lora_weights):
             else:
                 raise ValueError(f"Unknown initialization {init_lora_weights=}")
             nn.init.zeros_(self.lora_B[adapter_name].weight)
-        if adapter_name in self.lora_embedding_A.keys():
+        if adapter_name in dict(self.lora_embedding_A).keys():
             # initialize a the same way as the default for nn.linear and b to zero
             nn.init.zeros_(self.lora_embedding_A[adapter_name])
             nn.init.normal_(self.lora_embedding_B[adapter_name])
diff --git a/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py b/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py
index 66467f94c..5f643a017 100644
--- a/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py
+++ b/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py
@@ -416,6 +416,8 @@ def set_adapter(self, adapter_names: str | list[str]) -> None:
         # Deactivate grads on the inactive adapter and activate grads on the active adapter
         for layer_name in self.adapter_layer_names:
             module_dict = getattr(self, layer_name)
+            if isinstance(module_dict, paddle.nn.ParameterDict):
+                module_dict = dict(module_dict)
             for key, layer in module_dict.items():
                 if key in adapter_names:
                     # Note: It is possible that not a single layer is called with requires_grad_(True) here. This may

From cc24c8ff8f82cd82ca46d14131f9c59a520de628 Mon Sep 17 00:00:00 2001
From: westfish <westfish@126.com>
Date: Fri, 15 Nov 2024 14:23:32 +0800
Subject: [PATCH 04/11] update

---
 .../dp/stable_diffusion_3/benchmark_common/prepare.sh     | 7 ++++---
 .../stable_diffusion_3/benchmark_common/run_benchmark.sh  | 8 ++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh
index 1df6ab524..800f78eba 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh
@@ -45,10 +45,11 @@ if [ "$RUN_SETUP" = "true" ]; then
     echo "Running setup and installation steps..."
 
     export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH
-    python -m pip install --upgrade pip -i https://mirror.baidu.com/pypi/simple
-    python -m pip install einops -i https://mirror.baidu.com/pypi/simple
+    python -m pip install --upgrade pip
+    # python -m pip install paddlepaddle-gpu==3.0.0b2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
+    python -m pip install einops
     python -m pip install -r ../requirements.txt
-    python -m pip install --upgrade pybind11 regex sentencepiece tqdm visualdl attrdict easydict pyyaml paddlesde -i https://mirror.baidu.com/pypi/simple
+    python -m pip install --upgrade pybind11 regex sentencepiece tqdm visualdl attrdict easydict pyyaml paddlesde
     python -m pip install paddlenlp==3.0.0b2
     python -m pip install huggingface-hub==0.23.0
 
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh
index 337ec9082..6399a7d77 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh
@@ -100,7 +100,7 @@ function _train(){
             --lr_warmup_steps=0 \
             --max_train_steps=${max_iter} \
             --validation_prompt=A-photo-of-sks-dog-in-a-bucket \
-            --validation_epochs=20 \
+            --validation_epochs=100 \
             --num_validation_images 1 \
             --seed=0 \
             --checkpointing_steps=10000
@@ -123,7 +123,7 @@ function _train(){
             --lr_warmup_steps=0 \
             --max_train_steps=${max_iter} \
             --validation_prompt=A-photo-of-sks-dog-in-a-bucket \
-            --validation_epochs=20 \
+            --validation_epochs=100 \
             --num_validation_images 1 \
             --seed=0 \
             --checkpointing_steps=10000
@@ -146,8 +146,8 @@ function _train(){
     esac
 
     echo "train_cmd: ${train_cmd}  log_file: ${log_file}"
-    RUN_SETUP=${RUN_SETUP:-"true"}
-    if [ "$RUN_SETUP" = "true" ]; then
+    RUN_SLOW=${RUN_SLOW:-"true"}
+    if [ "$RUN_SLOW" = "true" ]; then
         timeout 30m ${train_cmd} > ${log_file} 2>&1
     else
         echo "fast mode, only run 3m"

From 8e37d7fcab214db008def01ab7becd4614004e67 Mon Sep 17 00:00:00 2001
From: westfish <westfish@126.com>
Date: Fri, 15 Nov 2024 15:05:57 +0800
Subject: [PATCH 05/11] update

---
 ppdiffusers/ppdiffusers/loaders/deprecate.py          | 4 ++--
 ppdiffusers/ppdiffusers/models/attention_processor.py | 8 ++++----
 ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py   | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/ppdiffusers/ppdiffusers/loaders/deprecate.py b/ppdiffusers/ppdiffusers/loaders/deprecate.py
index 2b88f5aeb..e1e72424b 100644
--- a/ppdiffusers/ppdiffusers/loaders/deprecate.py
+++ b/ppdiffusers/ppdiffusers/loaders/deprecate.py
@@ -19,7 +19,7 @@
 def text_encoder_lora_state_dict(text_encoder):
     deprecate(
         "text_encoder_load_state_dict in `models`",
-        "0.27.0",
+        "0.45.0",
         "`text_encoder_lora_state_dict` is deprecated and will be removed in 0.27.0. Make sure to retrieve the weights using `get_peft_model`. See https://huggingface.co/docs/peft/v0.6.2/en/quicktour#peftmodel for more information.",
     )
     state_dict = {}
@@ -45,7 +45,7 @@ def text_encoder_lora_state_dict(text_encoder):
     def text_encoder_attn_modules(text_encoder):
         deprecate(
             "text_encoder_attn_modules in `models`",
-            "0.27.0",
+            "0.45.0",
             "`text_encoder_lora_state_dict` is deprecated and will be removed in 0.27.0. Make sure to retrieve the weights using `get_peft_model`. See https://huggingface.co/docs/peft/v0.6.2/en/quicktour#peftmodel for more information.",
         )
         from ppdiffusers.transformers import CLIPTextModel, CLIPTextModelWithProjection
diff --git a/ppdiffusers/ppdiffusers/models/attention_processor.py b/ppdiffusers/ppdiffusers/models/attention_processor.py
index c93c55ae6..d6d878461 100644
--- a/ppdiffusers/ppdiffusers/models/attention_processor.py
+++ b/ppdiffusers/ppdiffusers/models/attention_processor.py
@@ -362,7 +362,7 @@ def set_processor(self, processor: "AttnProcessor", _remove_lora: bool = False)
         if not USE_PEFT_BACKEND and hasattr(self, "processor") and _remove_lora and self.to_q.lora_layer is not None:
             deprecate(
                 "set_processor to offload LoRA",
-                "0.26.0",
+                "0.45.0",
                 "In detail, removing LoRA layers via calling `set_default_attn_processor` is deprecated. Please make sure to call `pipe.unload_lora_weights()` instead.",
             )
             # TODO(Patrick, Sayak) - this can be deprecated once PEFT LoRA integration is complete
@@ -1635,7 +1635,7 @@ def __call__(self, attn: Attention, hidden_states: paddle.Tensor, *args, **kwarg
         self_cls_name = self.__class__.__name__
         deprecate(
             self_cls_name,
-            "0.26.0",
+            "0.45.0",
             (
                 f"Make sure use {self_cls_name[4:]} instead by setting"
                 "LoRA layers to `self.{to_q,to_k,to_v,to_out[0]}.lora_layer` respectively. This will be done automatically when using"
@@ -1714,7 +1714,7 @@ def __call__(self, attn: Attention, hidden_states: paddle.Tensor, *args, **kwarg
         self_cls_name = self.__class__.__name__
         deprecate(
             self_cls_name,
-            "0.26.0",
+            "0.45.0",
             (
                 f"Make sure use {self_cls_name[4:]} instead by setting"
                 "LoRA layers to `self.{to_q,to_k,to_v,add_k_proj,add_v_proj,to_out[0]}.lora_layer` respectively. This will be done automatically when using"
@@ -1773,7 +1773,7 @@ def __call__(self, attn: Attention, hidden_states: paddle.Tensor, *args, **kwarg
         self_cls_name = self.__class__.__name__
         deprecate(
             self_cls_name,
-            "0.26.0",
+            "0.45.0",
             (
                 f"Make sure use {self_cls_name[4:]} instead by setting"
                 "LoRA layers to `self.{to_q,to_k,to_v,add_k_proj,add_v_proj,to_out[0]}.lora_layer` respectively. This will be done automatically when using"
diff --git a/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py b/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py
index 1ba2d67ab..ec9c5e4c4 100644
--- a/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py
+++ b/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py
@@ -688,10 +688,10 @@ def to(self, *args, **kwargs):
 
         paddle_dtype = kwargs.pop("paddle_dtype", None)
         if paddle_dtype is not None:
-            deprecate("paddle_dtype", "0.35.0", "")
+            deprecate("paddle_dtype", "0.45.0", "")
         paddle_device = kwargs.pop("paddle_device", None)
         if paddle_device is not None:
-            deprecate("paddle_device", "0.35.0", "")
+            deprecate("paddle_device", "0.45.0", "")
 
         dtype_kwarg = kwargs.pop("dtype", None)
         device_kwarg = kwargs.pop("device", None)

From 910d9f4a2b5a2028c1e8a4174fe0fe4f09cfb1bf Mon Sep 17 00:00:00 2001
From: westfish <westfish@126.com>
Date: Fri, 15 Nov 2024 16:18:39 +0800
Subject: [PATCH 06/11] update

---
 ...e_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh | 27 -------------------
 ...diffusion_3-dreambooth_lora_bs4_bf16_DP.sh | 27 -------------------
 ...e_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh | 27 -------------------
 ...diffusion_3-dreambooth_lora_bs4_bf16_DP.sh | 27 -------------------
 4 files changed, 108 deletions(-)
 delete mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
 delete mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh
 delete mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
 delete mode 100644 tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh

diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
deleted file mode 100644
index f2a24ab7e..000000000
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-model_item=stable_diffusion_3-dreambooth_ft
-model=stable_diffusion_3
-bs_item=4
-fp_item=bf16
-run_mode=DP
-device_num=N1C1
-max_iter=1000
-num_workers=0
-
-# get data
-bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
-# run
-bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh
deleted file mode 100644
index 4fb089122..000000000
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-model_item=stable_diffusion_3-dreambooth_lora
-model=stable_diffusion_3
-bs_item=4
-fp_item=bf16
-run_mode=DP
-device_num=N1C1
-max_iter=1000
-num_workers=0
-
-# get data
-bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
-# run
-bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
deleted file mode 100644
index 3c68cfbbe..000000000
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_bf16_DP.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-model_item=stable_diffusion_3-dreambooth_ft
-model=stable_diffusion_3
-bs_item=4
-fp_item=bf16
-run_mode=DP
-device_num=N1C8
-max_iter=1000
-num_workers=0
-
-# get data
-bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
-# run
-bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh
deleted file mode 100644
index a8a1b5ea4..000000000
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_bf16_DP.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-model_item=stable_diffusion_3-dreambooth_lora
-model=stable_diffusion_3
-bs_item=4
-fp_item=bf16
-run_mode=DP
-device_num=N1C8
-max_iter=1000
-num_workers=0
-
-# get data
-bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
-# run
-bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1;

From 07482a90cfa94902b662ac2dadc43600905544a4 Mon Sep 17 00:00:00 2001
From: westfish <westfish@126.com>
Date: Fri, 15 Nov 2024 16:28:09 +0800
Subject: [PATCH 07/11] trim

---
 .../stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh    |  2 +-
 .../stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh    |  2 +-
 .../stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh  |  2 +-
 .../stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh  |  2 +-
 .../stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh    |  2 +-
 .../stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh    |  2 +-
 .../stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh  |  2 +-
 .../stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh  |  2 +-
 .../dp/stable_diffusion_3/benchmark_common/prepare.sh  | 10 +---------
 .../benchmark_common/run_benchmark.sh                  |  8 --------
 10 files changed, 9 insertions(+), 25 deletions(-)

diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
index 998f2edbc..2d2242d30 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
index 1692f8a5d..1fc7d081b 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
index e69f24dec..6d561ee48 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
index c54b80e7e..13d90c9ba 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
index 1cfe3ce76..fe454c0ef 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
index 94025ce08..ded063935 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
index 88eccc249..686428ad5 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
index 82576d762..ead2da890 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh
index 800f78eba..6b6dbdf7c 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,14 +31,6 @@ else
     echo "Directory dog already exists. Skipping download."
 fi
 
-# rm -rf stable-diffusion-3-medium-diffusers-paddle-init.tar.gz
-# rm -rf stable-diffusion-3-medium-diffusers-paddle-init
-# rm -rf dog.zip
-# rm -rf dog
-# wget https://bj.bcebos.com/paddlenlp/models/community/westfish/sd3_benchmark/stable-diffusion-3-medium-diffusers-paddle-init.tar.gz
-# tar -zxvf stable-diffusion-3-medium-diffusers-paddle-init.tar.gz
-# wget https://paddlenlp.bj.bcebos.com/models/community/westfish/develop-sdxl/dog.zip
-# unzip dog.zip
 
 RUN_SETUP=${RUN_SETUP:-"true"}
 if [ "$RUN_SETUP" = "true" ]; then
diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh
index 6399a7d77..7d6e3c26b 100644
--- a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh
+++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh
@@ -62,12 +62,6 @@ function _train(){
     export FLAG_FUSED_LINEAR=0
     export FLAGS_conv_workspace_size_limit=4096
 
-    # # 是否开启 ema
-    # export FLAG_USE_EMA=0
-    # # 是否开启 recompute
-    # export FLAG_RECOMPUTE=1
-    # # 是否开启 xformers
-    # export FLAG_XFORMERS=1
     export FLAGS_cudnn_deterministic=True
     env |grep FLAG
 
@@ -80,8 +74,6 @@ function _train(){
     ls;
     echo "------------"
 
-    # model_path=../ppdiffusers/examples/dreambooth/
-    # cd ${model_path}
 
     if [ ${model_item} = "stable_diffusion_3-dreambooth_ft" ];then
         train_cmd="

From eefc34602ac743c346ae6741d264aa7b48d2d490 Mon Sep 17 00:00:00 2001
From: westfish <westfish@126.com>
Date: Fri, 15 Nov 2024 16:46:00 +0800
Subject: [PATCH 08/11] add sd3 infer bench

---
 .../deploy/sd3/infer_dygraph_paddle.py        | 273 ++++++++++++++
 ppdiffusers/deploy/sd3/infer_dygraph_torch.py | 334 ++++++++++++++++++
 .../deploy/sd3/scripts/benchmark_paddle.sh    |  32 ++
 .../deploy/sd3/scripts/benchmark_torch.sh     |  26 ++
 4 files changed, 665 insertions(+)
 create mode 100644 ppdiffusers/deploy/sd3/infer_dygraph_paddle.py
 create mode 100644 ppdiffusers/deploy/sd3/infer_dygraph_torch.py
 create mode 100644 ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh
 create mode 100644 ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh

diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py b/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py
new file mode 100644
index 000000000..5db7de8b3
--- /dev/null
+++ b/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py
@@ -0,0 +1,273 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import time
+import warnings
+
+import cv2
+import numpy as np
+import paddle
+from PIL import Image
+from tqdm.auto import trange
+
+from ppdiffusers import (
+    FlowMatchEulerDiscreteScheduler,
+    DDIMScheduler,
+    DDPMScheduler,
+    DEISMultistepScheduler,
+    DPMSolverMultistepScheduler,
+    DPMSolverSinglestepScheduler,
+    EulerAncestralDiscreteScheduler,
+    EulerDiscreteScheduler,
+    HeunDiscreteScheduler,
+    KDPM2AncestralDiscreteScheduler,
+    KDPM2DiscreteScheduler,
+    LMSDiscreteScheduler,
+    PNDMScheduler,
+    StableDiffusion3Pipeline,
+    UniPCMultistepScheduler,
+)
+from ppdiffusers.utils import load_image
+
+
+def get_canny_image(image, args):
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+    image = cv2.Canny(image, args.low_threshold, args.high_threshold)
+    image = image[:, :, None]
+    image = np.concatenate([image, image, image], axis=2)
+    canny_image = Image.fromarray(image)
+    return canny_image
+
+
+def strtobool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise ValueError(
+            f"Truthy value expected: got {v} but expected one of yes/no, true/false, t/f, y/n, 1/0 (case insensitive)."
+        )
+
+
+def change_scheduler(self, scheduler_type="ddim"):
+    self.orginal_scheduler_config = self.scheduler.config
+    scheduler_type = scheduler_type.lower()
+    if scheduler_type == "flow":
+        scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True)
+    elif scheduler_type == "pndm":
+        scheduler = PNDMScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True)
+    elif scheduler_type == "lms":
+        scheduler = LMSDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "heun":
+        scheduler = HeunDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler":
+        scheduler = EulerDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler-ancestral":
+        scheduler = EulerAncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-multi":
+        scheduler = DPMSolverMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-single":
+        scheduler = DPMSolverSinglestepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2-ancestral":
+        scheduler = KDPM2AncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2":
+        scheduler = KDPM2DiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "unipc-multi":
+        scheduler = UniPCMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "ddim":
+        scheduler = DDIMScheduler.from_config(
+            self.orginal_scheduler_config,
+            steps_offset=1,
+            clip_sample=False,
+            set_alpha_to_one=False,
+        )
+    elif scheduler_type == "ddpm":
+        scheduler = DDPMScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    elif scheduler_type == "deis-multi":
+        scheduler = DEISMultistepScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    else:
+        raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!")
+    return scheduler
+
+
+def parse_arguments():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        type=str,
+        default="stabilityai/stable-diffusion-3-medium-diffusers",
+        help="Path to the `diffusers` checkpoint to convert (either a local directory or on the bos).",
+    )
+    parser.add_argument(
+        "--inference_steps",
+        type=int,
+        default=50,
+        help="The number of unet inference steps.",
+    )
+    parser.add_argument(
+        "--benchmark_steps",
+        type=int,
+        default=10,
+        help="The number of performance benchmark steps.",
+    )
+    parser.add_argument(
+        "--task_name",
+        type=str,
+        default="all",
+        choices=[
+            "text2img",
+            "img2img",
+            "inpaint_legacy",
+            "all",
+        ],
+        help="The task can be one of [text2img, img2img, inpaint_legacy, all]. ",
+    )
+    parser.add_argument(
+        "--parse_prompt_type",
+        type=str,
+        default="raw",
+        choices=[
+            "raw",
+            "lpw",
+        ],
+        help="The parse_prompt_type can be one of [raw, lpw]. ",
+    )
+    parser.add_argument("--use_fp16", type=strtobool, default=True, help="Wheter to use FP16 mode")
+    parser.add_argument(
+        "--attention_type", type=str, default="raw", choices=["raw", "cutlass", "flash", "all"], help="attention_type."
+    )
+    parser.add_argument("--device_id", type=int, default=0, help="The selected gpu id. -1 means use cpu")
+    parser.add_argument(
+        "--scheduler",
+        type=str,
+        default="euler-ancestral",
+        choices=[
+            "flow",
+            "pndm",
+            "lms",
+            "euler",
+            "euler-ancestral",
+            "dpm-multi",
+            "dpm-single",
+            "unipc-multi",
+            "ddim",
+            "ddpm",
+            "deis-multi",
+            "heun",
+            "kdpm2-ancestral",
+            "kdpm2",
+        ],
+        help="The scheduler type of stable diffusion.",
+    )
+    parser.add_argument("--height", type=int, default=512, help="Height of input image")
+    parser.add_argument("--width", type=int, default=512, help="Width of input image")
+    parser.add_argument("--strength", type=float, default=1.0, help="Strength for img2img / inpaint")
+    return parser.parse_args()
+
+
+def main(args):
+
+    seed = 1024
+    paddle_dtype = paddle.float16 if args.use_fp16 else paddle.float32
+    pipe = StableDiffusion3Pipeline.from_pretrained(
+        args.pretrained_model_name_or_path,
+        safety_checker=None,
+        feature_extractor=None,
+        requires_safety_checker=False,
+        paddle_dtype=paddle_dtype,
+    )
+    scheduler = change_scheduler(pipe, args.scheduler)
+    pipe.scheduler = scheduler
+
+    if args.attention_type == "all":
+        args.attention_type = ["raw", "cutlass", "flash"]
+    else:
+        args.attention_type = [args.attention_type]
+
+    for attention_type in args.attention_type:
+        if attention_type == "raw":
+            pipe.disable_xformers_memory_efficient_attention()
+        else:
+            try:
+                pipe.enable_xformers_memory_efficient_attention(attention_type)
+            except Exception as e:
+                if attention_type == "flash":
+                    warnings.warn(
+                        "Attention type flash is not supported on your GPU! We need to use 3060、3070、3080、3090、4060、4070、4080、4090、A30、A100 etc."
+                    )
+                    continue
+                else:
+                    raise ValueError(e)
+
+        if not args.use_fp16 and attention_type == "flash":
+            print("Flash attention is not supported dtype=float32! Please use float16 or bfloat16. We will skip this!")
+            continue
+
+        width = args.width
+        height = args.height
+        pipe.set_progress_bar_config(disable=False)
+
+        folder = f"paddle_attn_{attention_type}_fp16" if args.use_fp16 else f"paddle_attn_{attention_type}_fp32"
+        os.makedirs(folder, exist_ok=True)
+        if args.task_name in ["text2img", "all"]:
+            init_image = load_image(
+                "https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/control_bird_canny_demo.png"
+            )
+            # text2img
+            prompt = "bird"
+            time_costs = []
+            # warmup
+            pipe(
+                prompt,
+                num_inference_steps=10,
+                height=height,
+                width=width,
+            )
+            print("==> Test text2img performance.")
+            for step in trange(args.benchmark_steps):
+                start = time.time()
+                paddle.seed(seed)
+                images = pipe(
+                    prompt,
+                    num_inference_steps=args.inference_steps,
+                    height=height,
+                    width=width,
+                ).images
+                latency = time.time() - start
+                time_costs += [latency]
+                # print(f"No {step:3d} time cost: {latency:2f} s")
+            print(
+                f"Attention type: {attention_type}, "
+                f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+                f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+                f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+                f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+            )
+            images[0].save(f"{folder}/text2img.png")
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    main(args)
diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_torch.py b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py
new file mode 100644
index 000000000..b16869458
--- /dev/null
+++ b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py
@@ -0,0 +1,334 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import time
+
+import torch
+
+torch.nn.functional.scaled_dot_product_attention_ = torch.nn.functional.scaled_dot_product_attention
+delattr(torch.nn.functional, "scaled_dot_product_attention")
+
+import cv2
+import numpy as np
+from diffusers import (
+    FlowMatchEulerDiscreteScheduler,
+    DDIMScheduler,
+    DDPMScheduler,
+    DEISMultistepScheduler,
+    DPMSolverMultistepScheduler,
+    DPMSolverSinglestepScheduler,
+    EulerAncestralDiscreteScheduler,
+    EulerDiscreteScheduler,
+    HeunDiscreteScheduler,
+    KDPM2AncestralDiscreteScheduler,
+    KDPM2DiscreteScheduler,
+    LMSDiscreteScheduler,
+    PNDMScheduler,
+    StableDiffusion3Pipeline,
+    UniPCMultistepScheduler,
+)
+from diffusers.models.attention_processor import AttnProcessor, AttnProcessor2_0
+from diffusers.utils import load_image
+from PIL import Image
+from tqdm.auto import trange
+
+
+def get_canny_image(image, args):
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+    image = cv2.Canny(image, args.low_threshold, args.high_threshold)
+    image = image[:, :, None]
+    image = np.concatenate([image, image, image], axis=2)
+    canny_image = Image.fromarray(image)
+    return canny_image
+
+
+def strtobool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise ValueError(
+            f"Truthy value expected: got {v} but expected one of yes/no, true/false, t/f, y/n, 1/0 (case insensitive)."
+        )
+
+
+def change_scheduler(self, scheduler_type="ddim"):
+    self.orginal_scheduler_config = self.scheduler.config
+    scheduler_type = scheduler_type.lower()
+    if scheduler_type == "flow":
+        scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True)
+    elif scheduler_type == "pndm":
+        scheduler = PNDMScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True)
+    elif scheduler_type == "lms":
+        scheduler = LMSDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "heun":
+        scheduler = HeunDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler":
+        scheduler = EulerDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler-ancestral":
+        scheduler = EulerAncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-multi":
+        scheduler = DPMSolverMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-single":
+        scheduler = DPMSolverSinglestepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2-ancestral":
+        scheduler = KDPM2AncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2":
+        scheduler = KDPM2DiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "unipc-multi":
+        scheduler = UniPCMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "ddim":
+        scheduler = DDIMScheduler.from_config(
+            self.orginal_scheduler_config,
+            steps_offset=1,
+            clip_sample=False,
+            set_alpha_to_one=False,
+        )
+    elif scheduler_type == "ddpm":
+        scheduler = DDPMScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    elif scheduler_type == "deis-multi":
+        scheduler = DEISMultistepScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    else:
+        raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!")
+    return scheduler
+
+
+def parse_arguments():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        type=str,
+        default="stabilityai/stable-diffusion-3-medium-diffusers",
+        help="Path to the `diffusers` checkpoint to convert (either a local directory or on the bos).",
+    )
+    parser.add_argument(
+        "--inference_steps",
+        type=int,
+        default=50,
+        help="The number of unet inference steps.",
+    )
+    parser.add_argument(
+        "--benchmark_steps",
+        type=int,
+        default=10,
+        help="The number of performance benchmark steps.",
+    )
+    parser.add_argument(
+        "--task_name",
+        type=str,
+        default="all",
+        choices=[
+            "text2img",
+            "img2img",
+            "inpaint_legacy",
+            "all",
+        ],
+        help="The task can be one of [text2img, img2img, inpaint_legacy, all]. ",
+    )
+    parser.add_argument(
+        "--parse_prompt_type",
+        type=str,
+        default="raw",
+        choices=[
+            "raw",
+            "lpw",
+        ],
+        help="The parse_prompt_type can be one of [raw, lpw]. ",
+    )
+    parser.add_argument(
+        "--channels_last",
+        type=strtobool,
+        default=False,
+        help="Wheter to use channels_last",
+    )
+    parser.add_argument("--use_fp16", type=strtobool, default=True, help="Wheter to use FP16 mode")
+    parser.add_argument("--tf32", type=strtobool, default=True, help="tf32")
+    parser.add_argument("--compile", type=strtobool, default=False, help="compile")
+    parser.add_argument(
+        "--attention_type",
+        type=str,
+        default="sdp",
+        choices=[
+            "raw",
+            "sdp",
+        ],
+        help="attention_type.",
+    )
+    parser.add_argument("--device_id", type=int, default=0, help="The selected gpu id. -1 means use cpu")
+    parser.add_argument(
+        "--scheduler",
+        type=str,
+        default="euler-ancestral",
+        choices=[
+            "flow",
+            "pndm",
+            "lms",
+            "euler",
+            "euler-ancestral",
+            "dpm-multi",
+            "dpm-single",
+            "unipc-multi",
+            "ddim",
+            "ddpm",
+            "deis-multi",
+            "heun",
+            "kdpm2-ancestral",
+            "kdpm2",
+        ],
+        help="The scheduler type of stable diffusion.",
+    )
+    parser.add_argument("--height", type=int, default=512, help="Height of input image")
+    parser.add_argument("--width", type=int, default=512, help="Width of input image")
+    parser.add_argument("--strength", type=float, default=1.0, help="Strength for img2img / inpaint")
+    return parser.parse_args()
+
+
+def attn_processors(self):
+    processors = {}
+
+    def fn_recursive_add_processors(name: str, module, processors):
+        if hasattr(module, "set_processor"):
+            processors[f"{name}.processor"] = module.processor
+
+        for sub_name, child in module.named_children():
+            fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
+
+        return processors
+
+    for name, module in self.named_children():
+        fn_recursive_add_processors(name, module, processors)
+
+    return processors
+
+
+def set_attn_processor(self, processor):
+    count = len(attn_processors(self).keys())
+
+    if isinstance(processor, dict) and len(processor) != count:
+        raise ValueError(
+            f"A dict of processors was passed, but the number of processors {len(processor)} does not match the"
+            f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
+        )
+
+    def fn_recursive_attn_processor(name: str, module, processor):
+        if hasattr(module, "set_processor"):
+            if not isinstance(processor, dict):
+                module.set_processor(processor)
+            else:
+                module.set_processor(processor.pop(f"{name}.processor"))
+
+        for sub_name, child in module.named_children():
+            fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
+
+    for name, module in self.named_children():
+        fn_recursive_attn_processor(name, module, processor)
+
+
+def main(args):
+    if args.tf32:
+        torch.backends.cuda.matmul.allow_tf32 = True
+    else:
+        torch.backends.cuda.matmul.allow_tf32 = False
+
+    seed = 1024
+    torch_dtype = torch.float16 if args.use_fp16 else torch.float32
+    pipe = StableDiffusion3Pipeline.from_pretrained(
+        args.pretrained_model_name_or_path,
+        safety_checker=None,
+        feature_extractor=None,
+        requires_safety_checker=False,
+        torch_dtype=torch_dtype,
+    )
+    scheduler = change_scheduler(pipe, args.scheduler)
+    pipe.scheduler = scheduler
+    if args.device_id >= 0:
+        pipe.to(f"cuda:{args.device_id}")
+
+    if args.attention_type == "all":
+        args.attention_type = ["raw", "sdp"]
+    else:
+        args.attention_type = [args.attention_type]
+
+    for attention_type in args.attention_type:
+        attn_prrocessor_cls = AttnProcessor if attention_type == "raw" else AttnProcessor2_0
+        if attention_type == "sdp":
+            torch.nn.functional.scaled_dot_product_attention = torch.nn.functional.scaled_dot_product_attention_
+        set_attn_processor(pipe.unet, attn_prrocessor_cls())
+        set_attn_processor(pipe.vae, attn_prrocessor_cls())
+
+        if args.channels_last:
+            pipe.unet.to(memory_format=torch.channels_last)
+
+        if args.compile:
+            print("Run torch compile")
+            pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+
+        width = args.width
+        height = args.height
+        pipe.set_progress_bar_config(disable=False)
+
+        folder = f"torch_attn_{attention_type}_fp16" if args.use_fp16 else f"torch_attn_{attention_type}_fp32"
+        os.makedirs(folder, exist_ok=True)
+        if args.task_name in ["text2img", "all"]:
+            init_image = load_image(
+                "https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/control_bird_canny_demo.png"
+            )
+            # text2img
+            prompt = "bird"
+            time_costs = []
+            # warmup
+            pipe(
+                prompt,
+                num_inference_steps=10,
+                height=height,
+                width=width,
+            )
+            print("==> Test text2img performance.")
+            for step in trange(args.benchmark_steps):
+                start = time.time()
+                torch.cuda.manual_seed(seed)
+                images = pipe(
+                    prompt,
+                    num_inference_steps=args.inference_steps,
+                    height=height,
+                    width=width,
+                ).images
+                latency = time.time() - start
+                time_costs += [latency]
+                # print(f"No {step:3d} time cost: {latency:2f} s")
+            print(
+                f"Attention type: {attention_type}, "
+                f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+                f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+                f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+                f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+            )
+            images[0].save(f"{folder}/text2img.png")
+
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    main(args)
diff --git a/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh b/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh
new file mode 100644
index 000000000..a0c2d8d45
--- /dev/null
+++ b/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh
@@ -0,0 +1,32 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# attention raw fp16
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+# attention cutlass fp16
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type cutlass --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+# attention flash fp16
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type flash --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+
+# attention raw fp32
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+# attention cutlass fp32
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type cutlass --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+# attention flash fp32
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type flash --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
diff --git a/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh
new file mode 100644
index 000000000..9ef75119d
--- /dev/null
+++ b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh
@@ -0,0 +1,26 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# attention raw
+python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+# attention sdp
+python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+
+# attention raw fp32
+python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+# attention sdp fp32
+python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
\ No newline at end of file

From 684db2ed89775b0d9c504c63e06ac29e0b9213bd Mon Sep 17 00:00:00 2001
From: westfish <westfish@126.com>
Date: Fri, 15 Nov 2024 17:21:11 +0800
Subject: [PATCH 09/11] update

---
 ppdiffusers/deploy/sd3/infer_dygraph_torch.py | 28 +++++++++----------
 .../deploy/sd3/scripts/benchmark_torch.sh     | 11 ++------
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_torch.py b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py
index b16869458..621e87f27 100644
--- a/ppdiffusers/deploy/sd3/infer_dygraph_torch.py
+++ b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py
@@ -18,8 +18,8 @@
 
 import torch
 
-torch.nn.functional.scaled_dot_product_attention_ = torch.nn.functional.scaled_dot_product_attention
-delattr(torch.nn.functional, "scaled_dot_product_attention")
+# torch.nn.functional.scaled_dot_product_attention_ = torch.nn.functional.scaled_dot_product_attention
+# delattr(torch.nn.functional, "scaled_dot_product_attention")
 
 import cv2
 import numpy as np
@@ -272,18 +272,18 @@ def main(args):
         args.attention_type = [args.attention_type]
 
     for attention_type in args.attention_type:
-        attn_prrocessor_cls = AttnProcessor if attention_type == "raw" else AttnProcessor2_0
-        if attention_type == "sdp":
-            torch.nn.functional.scaled_dot_product_attention = torch.nn.functional.scaled_dot_product_attention_
-        set_attn_processor(pipe.unet, attn_prrocessor_cls())
-        set_attn_processor(pipe.vae, attn_prrocessor_cls())
-
-        if args.channels_last:
-            pipe.unet.to(memory_format=torch.channels_last)
-
-        if args.compile:
-            print("Run torch compile")
-            pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+        # attn_prrocessor_cls = AttnProcessor if attention_type == "raw" else AttnProcessor2_0
+        # if attention_type == "sdp":
+        #     torch.nn.functional.scaled_dot_product_attention = torch.nn.functional.scaled_dot_product_attention_
+        # set_attn_processor(pipe.transformer, attn_prrocessor_cls())
+        # set_attn_processor(pipe.vae, attn_prrocessor_cls())
+
+        # if args.channels_last:
+        #     pipe.transformer.to(memory_format=torch.channels_last)
+
+        # if args.compile:
+        #     print("Run torch compile")
+        #     pipe.unet = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=True)
 
         width = args.width
         height = args.height
diff --git a/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh
index 9ef75119d..24ca54337 100644
--- a/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh
+++ b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh
@@ -12,15 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# attention raw
-python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+# sd3 do ot supprot attention raw
 
 # attention sdp
-python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
-
-
-# attention raw fp32
-python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 --pretrained_model_name_or_path /root/paddlejob/workspace/env_run/output/zhangxu/benchmark/frame_benchmark/pytorch/dynamic/PaddleMIX/models/diffusers/stable-diffusion-3-medium-diffusers
 
 # attention sdp fp32
-python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
\ No newline at end of file
+python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 --pretrained_model_name_or_path /root/paddlejob/workspace/env_run/output/zhangxu/benchmark/frame_benchmark/pytorch/dynamic/PaddleMIX/models/diffusers/stable-diffusion-3-medium-diffusers
\ No newline at end of file

From e28c3be02bd0c0f28316b812d95512cda8b64dbb Mon Sep 17 00:00:00 2001
From: westfish <westfish@126.com>
Date: Fri, 15 Nov 2024 17:26:53 +0800
Subject: [PATCH 10/11] update

---
 ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh
index 24ca54337..020c54969 100644
--- a/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh
+++ b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh
@@ -15,7 +15,7 @@
 # sd3 do ot supprot attention raw
 
 # attention sdp
-python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 --pretrained_model_name_or_path /root/paddlejob/workspace/env_run/output/zhangxu/benchmark/frame_benchmark/pytorch/dynamic/PaddleMIX/models/diffusers/stable-diffusion-3-medium-diffusers
+python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 
 
 # attention sdp fp32
-python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 --pretrained_model_name_or_path /root/paddlejob/workspace/env_run/output/zhangxu/benchmark/frame_benchmark/pytorch/dynamic/PaddleMIX/models/diffusers/stable-diffusion-3-medium-diffusers
\ No newline at end of file
+python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 
\ No newline at end of file

From a7b824c2ad1f437d59ec9c6c76cbd8d0b7051933 Mon Sep 17 00:00:00 2001
From: westfish <westfish@126.com>
Date: Wed, 27 Nov 2024 03:53:01 +0000
Subject: [PATCH 11/11] update

---
 ppdiffusers/deploy/sd3/infer_dygraph_paddle.py | 9 ---------
 ppdiffusers/deploy/sd3/infer_dygraph_torch.py  | 9 ---------
 2 files changed, 18 deletions(-)

diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py b/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py
index 5db7de8b3..14d1f5f24 100644
--- a/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py
+++ b/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py
@@ -43,15 +43,6 @@
 from ppdiffusers.utils import load_image
 
 
-def get_canny_image(image, args):
-    if isinstance(image, Image.Image):
-        image = np.array(image)
-    image = cv2.Canny(image, args.low_threshold, args.high_threshold)
-    image = image[:, :, None]
-    image = np.concatenate([image, image, image], axis=2)
-    canny_image = Image.fromarray(image)
-    return canny_image
-
 
 def strtobool(v):
     if isinstance(v, bool):
diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_torch.py b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py
index 621e87f27..14c547b56 100644
--- a/ppdiffusers/deploy/sd3/infer_dygraph_torch.py
+++ b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py
@@ -46,15 +46,6 @@
 from tqdm.auto import trange
 
 
-def get_canny_image(image, args):
-    if isinstance(image, Image.Image):
-        image = np.array(image)
-    image = cv2.Canny(image, args.low_threshold, args.high_threshold)
-    image = image[:, :, None]
-    image = np.concatenate([image, image, image], axis=2)
-    canny_image = Image.fromarray(image)
-    return canny_image
-
 
 def strtobool(v):
     if isinstance(v, bool):