diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py b/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py new file mode 100644 index 000000000..14d1f5f24 --- /dev/null +++ b/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py @@ -0,0 +1,264 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import time +import warnings + +import cv2 +import numpy as np +import paddle +from PIL import Image +from tqdm.auto import trange + +from ppdiffusers import ( + FlowMatchEulerDiscreteScheduler, + DDIMScheduler, + DDPMScheduler, + DEISMultistepScheduler, + DPMSolverMultistepScheduler, + DPMSolverSinglestepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + HeunDiscreteScheduler, + KDPM2AncestralDiscreteScheduler, + KDPM2DiscreteScheduler, + LMSDiscreteScheduler, + PNDMScheduler, + StableDiffusion3Pipeline, + UniPCMultistepScheduler, +) +from ppdiffusers.utils import load_image + + + +def strtobool(v): + if isinstance(v, bool): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("no", "false", "f", "n", "0"): + return False + else: + raise ValueError( + f"Truthy value expected: got {v} but expected one of yes/no, true/false, t/f, y/n, 1/0 (case insensitive)." + ) + + +def change_scheduler(self, scheduler_type="ddim"): + self.orginal_scheduler_config = self.scheduler.config + scheduler_type = scheduler_type.lower() + if scheduler_type == "flow": + scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True) + elif scheduler_type == "pndm": + scheduler = PNDMScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True) + elif scheduler_type == "lms": + scheduler = LMSDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "heun": + scheduler = HeunDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "euler": + scheduler = EulerDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "euler-ancestral": + scheduler = EulerAncestralDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "dpm-multi": + scheduler = DPMSolverMultistepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "dpm-single": + scheduler = DPMSolverSinglestepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "kdpm2-ancestral": + scheduler = KDPM2AncestralDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "kdpm2": + scheduler = KDPM2DiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "unipc-multi": + scheduler = UniPCMultistepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "ddim": + scheduler = DDIMScheduler.from_config( + self.orginal_scheduler_config, + steps_offset=1, + clip_sample=False, + set_alpha_to_one=False, + ) + elif scheduler_type == "ddpm": + scheduler = DDPMScheduler.from_config( + self.orginal_scheduler_config, + ) + elif scheduler_type == "deis-multi": + scheduler = DEISMultistepScheduler.from_config( + self.orginal_scheduler_config, + ) + else: + raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") + return scheduler + + +def parse_arguments(): + + parser = argparse.ArgumentParser() + parser.add_argument( + "--pretrained_model_name_or_path", + type=str, + default="stabilityai/stable-diffusion-3-medium-diffusers", + help="Path to the `diffusers` checkpoint to convert (either a local directory or on the bos).", + ) + parser.add_argument( + "--inference_steps", + type=int, + default=50, + help="The number of unet inference steps.", + ) + parser.add_argument( + "--benchmark_steps", + type=int, + default=10, + help="The number of performance benchmark steps.", + ) + parser.add_argument( + "--task_name", + type=str, + default="all", + choices=[ + "text2img", + "img2img", + "inpaint_legacy", + "all", + ], + help="The task can be one of [text2img, img2img, inpaint_legacy, all]. ", + ) + parser.add_argument( + "--parse_prompt_type", + type=str, + default="raw", + choices=[ + "raw", + "lpw", + ], + help="The parse_prompt_type can be one of [raw, lpw]. ", + ) + parser.add_argument("--use_fp16", type=strtobool, default=True, help="Wheter to use FP16 mode") + parser.add_argument( + "--attention_type", type=str, default="raw", choices=["raw", "cutlass", "flash", "all"], help="attention_type." + ) + parser.add_argument("--device_id", type=int, default=0, help="The selected gpu id. -1 means use cpu") + parser.add_argument( + "--scheduler", + type=str, + default="euler-ancestral", + choices=[ + "flow", + "pndm", + "lms", + "euler", + "euler-ancestral", + "dpm-multi", + "dpm-single", + "unipc-multi", + "ddim", + "ddpm", + "deis-multi", + "heun", + "kdpm2-ancestral", + "kdpm2", + ], + help="The scheduler type of stable diffusion.", + ) + parser.add_argument("--height", type=int, default=512, help="Height of input image") + parser.add_argument("--width", type=int, default=512, help="Width of input image") + parser.add_argument("--strength", type=float, default=1.0, help="Strength for img2img / inpaint") + return parser.parse_args() + + +def main(args): + + seed = 1024 + paddle_dtype = paddle.float16 if args.use_fp16 else paddle.float32 + pipe = StableDiffusion3Pipeline.from_pretrained( + args.pretrained_model_name_or_path, + safety_checker=None, + feature_extractor=None, + requires_safety_checker=False, + paddle_dtype=paddle_dtype, + ) + scheduler = change_scheduler(pipe, args.scheduler) + pipe.scheduler = scheduler + + if args.attention_type == "all": + args.attention_type = ["raw", "cutlass", "flash"] + else: + args.attention_type = [args.attention_type] + + for attention_type in args.attention_type: + if attention_type == "raw": + pipe.disable_xformers_memory_efficient_attention() + else: + try: + pipe.enable_xformers_memory_efficient_attention(attention_type) + except Exception as e: + if attention_type == "flash": + warnings.warn( + "Attention type flash is not supported on your GPU! We need to use 3060、3070、3080、3090、4060、4070、4080、4090、A30、A100 etc." + ) + continue + else: + raise ValueError(e) + + if not args.use_fp16 and attention_type == "flash": + print("Flash attention is not supported dtype=float32! Please use float16 or bfloat16. We will skip this!") + continue + + width = args.width + height = args.height + pipe.set_progress_bar_config(disable=False) + + folder = f"paddle_attn_{attention_type}_fp16" if args.use_fp16 else f"paddle_attn_{attention_type}_fp32" + os.makedirs(folder, exist_ok=True) + if args.task_name in ["text2img", "all"]: + init_image = load_image( + "https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/control_bird_canny_demo.png" + ) + # text2img + prompt = "bird" + time_costs = [] + # warmup + pipe( + prompt, + num_inference_steps=10, + height=height, + width=width, + ) + print("==> Test text2img performance.") + for step in trange(args.benchmark_steps): + start = time.time() + paddle.seed(seed) + images = pipe( + prompt, + num_inference_steps=args.inference_steps, + height=height, + width=width, + ).images + latency = time.time() - start + time_costs += [latency] + # print(f"No {step:3d} time cost: {latency:2f} s") + print( + f"Attention type: {attention_type}, " + f"Use fp16: {'true' if args.use_fp16 else 'false'}, " + f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, " + f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, " + f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s." + ) + images[0].save(f"{folder}/text2img.png") + + +if __name__ == "__main__": + args = parse_arguments() + main(args) diff --git a/ppdiffusers/deploy/sd3/infer_dygraph_torch.py b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py new file mode 100644 index 000000000..14c547b56 --- /dev/null +++ b/ppdiffusers/deploy/sd3/infer_dygraph_torch.py @@ -0,0 +1,325 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import time + +import torch + +# torch.nn.functional.scaled_dot_product_attention_ = torch.nn.functional.scaled_dot_product_attention +# delattr(torch.nn.functional, "scaled_dot_product_attention") + +import cv2 +import numpy as np +from diffusers import ( + FlowMatchEulerDiscreteScheduler, + DDIMScheduler, + DDPMScheduler, + DEISMultistepScheduler, + DPMSolverMultistepScheduler, + DPMSolverSinglestepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + HeunDiscreteScheduler, + KDPM2AncestralDiscreteScheduler, + KDPM2DiscreteScheduler, + LMSDiscreteScheduler, + PNDMScheduler, + StableDiffusion3Pipeline, + UniPCMultistepScheduler, +) +from diffusers.models.attention_processor import AttnProcessor, AttnProcessor2_0 +from diffusers.utils import load_image +from PIL import Image +from tqdm.auto import trange + + + +def strtobool(v): + if isinstance(v, bool): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("no", "false", "f", "n", "0"): + return False + else: + raise ValueError( + f"Truthy value expected: got {v} but expected one of yes/no, true/false, t/f, y/n, 1/0 (case insensitive)." + ) + + +def change_scheduler(self, scheduler_type="ddim"): + self.orginal_scheduler_config = self.scheduler.config + scheduler_type = scheduler_type.lower() + if scheduler_type == "flow": + scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True) + elif scheduler_type == "pndm": + scheduler = PNDMScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True) + elif scheduler_type == "lms": + scheduler = LMSDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "heun": + scheduler = HeunDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "euler": + scheduler = EulerDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "euler-ancestral": + scheduler = EulerAncestralDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "dpm-multi": + scheduler = DPMSolverMultistepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "dpm-single": + scheduler = DPMSolverSinglestepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "kdpm2-ancestral": + scheduler = KDPM2AncestralDiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "kdpm2": + scheduler = KDPM2DiscreteScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "unipc-multi": + scheduler = UniPCMultistepScheduler.from_config(self.orginal_scheduler_config) + elif scheduler_type == "ddim": + scheduler = DDIMScheduler.from_config( + self.orginal_scheduler_config, + steps_offset=1, + clip_sample=False, + set_alpha_to_one=False, + ) + elif scheduler_type == "ddpm": + scheduler = DDPMScheduler.from_config( + self.orginal_scheduler_config, + ) + elif scheduler_type == "deis-multi": + scheduler = DEISMultistepScheduler.from_config( + self.orginal_scheduler_config, + ) + else: + raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") + return scheduler + + +def parse_arguments(): + + parser = argparse.ArgumentParser() + parser.add_argument( + "--pretrained_model_name_or_path", + type=str, + default="stabilityai/stable-diffusion-3-medium-diffusers", + help="Path to the `diffusers` checkpoint to convert (either a local directory or on the bos).", + ) + parser.add_argument( + "--inference_steps", + type=int, + default=50, + help="The number of unet inference steps.", + ) + parser.add_argument( + "--benchmark_steps", + type=int, + default=10, + help="The number of performance benchmark steps.", + ) + parser.add_argument( + "--task_name", + type=str, + default="all", + choices=[ + "text2img", + "img2img", + "inpaint_legacy", + "all", + ], + help="The task can be one of [text2img, img2img, inpaint_legacy, all]. ", + ) + parser.add_argument( + "--parse_prompt_type", + type=str, + default="raw", + choices=[ + "raw", + "lpw", + ], + help="The parse_prompt_type can be one of [raw, lpw]. ", + ) + parser.add_argument( + "--channels_last", + type=strtobool, + default=False, + help="Wheter to use channels_last", + ) + parser.add_argument("--use_fp16", type=strtobool, default=True, help="Wheter to use FP16 mode") + parser.add_argument("--tf32", type=strtobool, default=True, help="tf32") + parser.add_argument("--compile", type=strtobool, default=False, help="compile") + parser.add_argument( + "--attention_type", + type=str, + default="sdp", + choices=[ + "raw", + "sdp", + ], + help="attention_type.", + ) + parser.add_argument("--device_id", type=int, default=0, help="The selected gpu id. -1 means use cpu") + parser.add_argument( + "--scheduler", + type=str, + default="euler-ancestral", + choices=[ + "flow", + "pndm", + "lms", + "euler", + "euler-ancestral", + "dpm-multi", + "dpm-single", + "unipc-multi", + "ddim", + "ddpm", + "deis-multi", + "heun", + "kdpm2-ancestral", + "kdpm2", + ], + help="The scheduler type of stable diffusion.", + ) + parser.add_argument("--height", type=int, default=512, help="Height of input image") + parser.add_argument("--width", type=int, default=512, help="Width of input image") + parser.add_argument("--strength", type=float, default=1.0, help="Strength for img2img / inpaint") + return parser.parse_args() + + +def attn_processors(self): + processors = {} + + def fn_recursive_add_processors(name: str, module, processors): + if hasattr(module, "set_processor"): + processors[f"{name}.processor"] = module.processor + + for sub_name, child in module.named_children(): + fn_recursive_add_processors(f"{name}.{sub_name}", child, processors) + + return processors + + for name, module in self.named_children(): + fn_recursive_add_processors(name, module, processors) + + return processors + + +def set_attn_processor(self, processor): + count = len(attn_processors(self).keys()) + + if isinstance(processor, dict) and len(processor) != count: + raise ValueError( + f"A dict of processors was passed, but the number of processors {len(processor)} does not match the" + f" number of attention layers: {count}. Please make sure to pass {count} processor classes." + ) + + def fn_recursive_attn_processor(name: str, module, processor): + if hasattr(module, "set_processor"): + if not isinstance(processor, dict): + module.set_processor(processor) + else: + module.set_processor(processor.pop(f"{name}.processor")) + + for sub_name, child in module.named_children(): + fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor) + + for name, module in self.named_children(): + fn_recursive_attn_processor(name, module, processor) + + +def main(args): + if args.tf32: + torch.backends.cuda.matmul.allow_tf32 = True + else: + torch.backends.cuda.matmul.allow_tf32 = False + + seed = 1024 + torch_dtype = torch.float16 if args.use_fp16 else torch.float32 + pipe = StableDiffusion3Pipeline.from_pretrained( + args.pretrained_model_name_or_path, + safety_checker=None, + feature_extractor=None, + requires_safety_checker=False, + torch_dtype=torch_dtype, + ) + scheduler = change_scheduler(pipe, args.scheduler) + pipe.scheduler = scheduler + if args.device_id >= 0: + pipe.to(f"cuda:{args.device_id}") + + if args.attention_type == "all": + args.attention_type = ["raw", "sdp"] + else: + args.attention_type = [args.attention_type] + + for attention_type in args.attention_type: + # attn_prrocessor_cls = AttnProcessor if attention_type == "raw" else AttnProcessor2_0 + # if attention_type == "sdp": + # torch.nn.functional.scaled_dot_product_attention = torch.nn.functional.scaled_dot_product_attention_ + # set_attn_processor(pipe.transformer, attn_prrocessor_cls()) + # set_attn_processor(pipe.vae, attn_prrocessor_cls()) + + # if args.channels_last: + # pipe.transformer.to(memory_format=torch.channels_last) + + # if args.compile: + # print("Run torch compile") + # pipe.unet = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=True) + + width = args.width + height = args.height + pipe.set_progress_bar_config(disable=False) + + folder = f"torch_attn_{attention_type}_fp16" if args.use_fp16 else f"torch_attn_{attention_type}_fp32" + os.makedirs(folder, exist_ok=True) + if args.task_name in ["text2img", "all"]: + init_image = load_image( + "https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/control_bird_canny_demo.png" + ) + # text2img + prompt = "bird" + time_costs = [] + # warmup + pipe( + prompt, + num_inference_steps=10, + height=height, + width=width, + ) + print("==> Test text2img performance.") + for step in trange(args.benchmark_steps): + start = time.time() + torch.cuda.manual_seed(seed) + images = pipe( + prompt, + num_inference_steps=args.inference_steps, + height=height, + width=width, + ).images + latency = time.time() - start + time_costs += [latency] + # print(f"No {step:3d} time cost: {latency:2f} s") + print( + f"Attention type: {attention_type}, " + f"Use fp16: {'true' if args.use_fp16 else 'false'}, " + f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, " + f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, " + f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s." + ) + images[0].save(f"{folder}/text2img.png") + + + +if __name__ == "__main__": + args = parse_arguments() + main(args) diff --git a/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh b/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh new file mode 100644 index 000000000..a0c2d8d45 --- /dev/null +++ b/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh @@ -0,0 +1,32 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# attention raw fp16 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + +# attention cutlass fp16 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type cutlass --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + +# attention flash fp16 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type flash --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + + +# attention raw fp32 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + +# attention cutlass fp32 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type cutlass --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + +# attention flash fp32 +python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type flash --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 diff --git a/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh new file mode 100644 index 000000000..020c54969 --- /dev/null +++ b/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh @@ -0,0 +1,21 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# sd3 do ot supprot attention raw + +# attention sdp +python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 + +# attention sdp fp32 +python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 \ No newline at end of file diff --git a/ppdiffusers/ppdiffusers/loaders/deprecate.py b/ppdiffusers/ppdiffusers/loaders/deprecate.py index 2b88f5aeb..e1e72424b 100644 --- a/ppdiffusers/ppdiffusers/loaders/deprecate.py +++ b/ppdiffusers/ppdiffusers/loaders/deprecate.py @@ -19,7 +19,7 @@ def text_encoder_lora_state_dict(text_encoder): deprecate( "text_encoder_load_state_dict in `models`", - "0.27.0", + "0.45.0", "`text_encoder_lora_state_dict` is deprecated and will be removed in 0.27.0. Make sure to retrieve the weights using `get_peft_model`. See https://huggingface.co/docs/peft/v0.6.2/en/quicktour#peftmodel for more information.", ) state_dict = {} @@ -45,7 +45,7 @@ def text_encoder_lora_state_dict(text_encoder): def text_encoder_attn_modules(text_encoder): deprecate( "text_encoder_attn_modules in `models`", - "0.27.0", + "0.45.0", "`text_encoder_lora_state_dict` is deprecated and will be removed in 0.27.0. Make sure to retrieve the weights using `get_peft_model`. See https://huggingface.co/docs/peft/v0.6.2/en/quicktour#peftmodel for more information.", ) from ppdiffusers.transformers import CLIPTextModel, CLIPTextModelWithProjection diff --git a/ppdiffusers/ppdiffusers/models/attention_processor.py b/ppdiffusers/ppdiffusers/models/attention_processor.py index 3d6f1659b..3d20e0140 100644 --- a/ppdiffusers/ppdiffusers/models/attention_processor.py +++ b/ppdiffusers/ppdiffusers/models/attention_processor.py @@ -372,7 +372,7 @@ def set_processor(self, processor: "AttnProcessor", _remove_lora: bool = False) if not USE_PEFT_BACKEND and hasattr(self, "processor") and _remove_lora and self.to_q.lora_layer is not None: deprecate( "set_processor to offload LoRA", - "0.26.0", + "0.45.0", "In detail, removing LoRA layers via calling `set_default_attn_processor` is deprecated. Please make sure to call `pipe.unload_lora_weights()` instead.", ) # TODO(Patrick, Sayak) - this can be deprecated once PEFT LoRA integration is complete @@ -1645,7 +1645,7 @@ def __call__(self, attn: Attention, hidden_states: paddle.Tensor, *args, **kwarg self_cls_name = self.__class__.__name__ deprecate( self_cls_name, - "0.26.0", + "0.45.0", ( f"Make sure use {self_cls_name[4:]} instead by setting" "LoRA layers to `self.{to_q,to_k,to_v,to_out[0]}.lora_layer` respectively. This will be done automatically when using" @@ -1724,7 +1724,7 @@ def __call__(self, attn: Attention, hidden_states: paddle.Tensor, *args, **kwarg self_cls_name = self.__class__.__name__ deprecate( self_cls_name, - "0.26.0", + "0.45.0", ( f"Make sure use {self_cls_name[4:]} instead by setting" "LoRA layers to `self.{to_q,to_k,to_v,add_k_proj,add_v_proj,to_out[0]}.lora_layer` respectively. This will be done automatically when using" @@ -1783,7 +1783,7 @@ def __call__(self, attn: Attention, hidden_states: paddle.Tensor, *args, **kwarg self_cls_name = self.__class__.__name__ deprecate( self_cls_name, - "0.26.0", + "0.45.0", ( f"Make sure use {self_cls_name[4:]} instead by setting" "LoRA layers to `self.{to_q,to_k,to_v,add_k_proj,add_v_proj,to_out[0]}.lora_layer` respectively. This will be done automatically when using" diff --git a/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py b/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py index 907a92776..80debd760 100644 --- a/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py +++ b/ppdiffusers/ppdiffusers/peft/tuners/lora/layer.py @@ -165,7 +165,7 @@ def reset_lora_parameters(self, adapter_name, init_lora_weights): else: raise ValueError(f"Unknown initialization {init_lora_weights=}") nn.init.zeros_(self.lora_B[adapter_name].weight) - if adapter_name in self.lora_embedding_A.keys(): + if adapter_name in dict(self.lora_embedding_A).keys(): # initialize a the same way as the default for nn.linear and b to zero nn.init.zeros_(self.lora_embedding_A[adapter_name]) nn.init.normal_(self.lora_embedding_B[adapter_name]) diff --git a/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py b/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py index 66467f94c..5f643a017 100644 --- a/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py +++ b/ppdiffusers/ppdiffusers/peft/tuners/tuners_utils.py @@ -416,6 +416,8 @@ def set_adapter(self, adapter_names: str | list[str]) -> None: # Deactivate grads on the inactive adapter and activate grads on the active adapter for layer_name in self.adapter_layer_names: module_dict = getattr(self, layer_name) + if isinstance(module_dict, paddle.nn.ParameterDict): + module_dict = dict(module_dict) for key, layer in module_dict.items(): if key in adapter_names: # Note: It is possible that not a single layer is called with requires_grad_(True) here. This may diff --git a/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py b/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py index 1ba2d67ab..ec9c5e4c4 100644 --- a/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py +++ b/ppdiffusers/ppdiffusers/pipelines/pipeline_utils.py @@ -688,10 +688,10 @@ def to(self, *args, **kwargs): paddle_dtype = kwargs.pop("paddle_dtype", None) if paddle_dtype is not None: - deprecate("paddle_dtype", "0.35.0", "") + deprecate("paddle_dtype", "0.45.0", "") paddle_device = kwargs.pop("paddle_device", None) if paddle_device is not None: - deprecate("paddle_device", "0.35.0", "") + deprecate("paddle_device", "0.45.0", "") dtype_kwarg = kwargs.pop("dtype", None) device_kwarg = kwargs.pop("device", None) diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh new file mode 100644 index 000000000..2d2242d30 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_ft +model=stable_diffusion_3 +bs_item=1 +fp_item=fp16 +run_mode=DP +device_num=N1C1 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh new file mode 100644 index 000000000..1fc7d081b --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_ft +model=stable_diffusion_3 +bs_item=4 +fp_item=fp16 +run_mode=DP +device_num=N1C1 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh new file mode 100644 index 000000000..6d561ee48 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_lora +model=stable_diffusion_3 +bs_item=1 +fp_item=fp16 +run_mode=DP +device_num=N1C1 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh new file mode 100644 index 000000000..13d90c9ba --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C1/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_lora +model=stable_diffusion_3 +bs_item=4 +fp_item=fp16 +run_mode=DP +device_num=N1C1 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh new file mode 100644 index 000000000..fe454c0ef --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs1_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_ft +model=stable_diffusion_3 +bs_item=1 +fp_item=fp16 +run_mode=DP +device_num=N1C8 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh new file mode 100644 index 000000000..ded063935 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_ft_bs4_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_ft +model=stable_diffusion_3 +bs_item=4 +fp_item=fp16 +run_mode=DP +device_num=N1C8 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh new file mode 100644 index 000000000..686428ad5 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs1_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_lora +model=stable_diffusion_3 +bs_item=1 +fp_item=fp16 +run_mode=DP +device_num=N1C8 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh new file mode 100644 index 000000000..ead2da890 --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/N1C8/stable_diffusion_3-dreambooth_lora_bs4_fp16_DP.sh @@ -0,0 +1,27 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_item=stable_diffusion_3-dreambooth_lora +model=stable_diffusion_3 +bs_item=4 +fp_item=fp16 +run_mode=DP +device_num=N1C8 +max_iter=1000 +num_workers=0 + +# get data +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh +# run +bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py new file mode 100644 index 000000000..7606e8adc --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py @@ -0,0 +1,155 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +import json +import os +import re +import sys +from pdb import line_prefix + +import numpy as np +from numpy import mean, var + +class TimeAnalyzer(object): + def __init__(self, filename, keyword=None, loss_keyword=None): + if filename is None: + raise Exception("Please specify the filename!") + + if keyword is None: + raise Exception("Please specify the keyword!") + + self.filename = filename + self.keyword = keyword + self.loss_keyword = loss_keyword + + def get_ips(self): + ips_list = [] + loss_list = [] + loss_value = None + with open(self.filename, "r") as f_object: + lines = f_object.read().splitlines() + for line in lines: + if self.keyword not in line: + continue + try: + # result = None + + # # Distill the string from a line. + # line = line.strip() + # line_words = line.split() + # for i in range(len(line_words) - 1): + # if line_words[i] == self.keyword: + # result = float(line_words[i + 1].replace(',', '')) + # ips_list.append(result) + # if line_words[i] == self.loss_keyword: + # # 剔除掉该值后面的逗号并保留5位小数点 + # loss_value = line_words[i + 1].replace(',', '') + # # 保留5位小数 + # # loss_value = float("{:.5f}".format(float(loss_str_without_comma))) + + # # Distil the result from the picked string. + + # 提取 ips + ips_match = re.search(r'(\d+\.\d+)it/s', line) + if ips_match: + ips = float(ips_match.group(1)) + ips_list.append(ips) + + # 提取 loss + loss_match = re.search(r'loss=(\d+\.\d+)', line) + if loss_match: + loss = float(loss_match.group(1)) + loss_list.append(loss) + loss_value = loss + + except Exception as exc: + print("line is: {}; failed".format(line)) + print("Exception: {}".format(exc)) + if loss_value is None: + loss_value = -1 + def ewma(data, alpha): + smoothed_data = [] + for i, value in enumerate(data): + if i == 0: + smoothed_data.append(value) + else: + smoothed_value = alpha * value + (1 - alpha) * smoothed_data[-1] + smoothed_data.append(smoothed_value) + return smoothed_data + smoothed_loss = ewma(loss_list, 0.9)[-1] + return mean(ips_list[4:]), loss_value, smoothed_loss + + +def analyze(model_item, log_file, res_log_file, device_num, bs, fp_item): + + analyzer = TimeAnalyzer(log_file, 'Steps:', None) + ips, convergence_value, smoothed_value = analyzer.get_ips() + ips = round(ips, 3) + # with open(str(log_file), "r", encoding="utf8") as f: + # data = f.readlines() + # ips_lines = [] + # for eachline in data: + # if "train_samples_per_second:" in eachline: + # ips = float(eachline.split("train_samples_per_second: ")[1].split()[0].replace(',', '')) + # print("----ips: ", ips) + # ips_lines.append(ips) + # print("----ips_lines: ", ips_lines) + # ips = np.round(np.mean(ips_lines), 3) + ngpus = int(re.findall("\d+", device_num)[-1]) + batch_size = int(re.findall("\d+", str(bs))[-1]) + print("----ips: ", ips, "ngpus", ngpus, "batch_size", batch_size) + ips *= batch_size + ips *= ngpus + run_mode = "DP" + + model_name = model_item + "_" + "bs" + str(bs) + "_" + fp_item + "_" + run_mode + info = { + "model_branch": os.getenv("model_branch"), + "model_commit": os.getenv("model_commit"), + "model_name": model_name, + "batch_size": bs, + "fp_item": fp_item, + "run_mode": run_mode, + "convergence_value": convergence_value, + "smoothed_value": smoothed_value, + "convergence_key": "", + "ips": ips, + "speed_unit": "sample/sec", + "device_num": device_num, + "model_run_time": os.getenv("model_run_time"), + "frame_commit": "", + "frame_version": os.getenv("frame_version"), + } + json_info = json.dumps(info) + print(json_info) + with open(res_log_file, "w") as of: + of.write(json_info) + + +if __name__ == "__main__": + if len(sys.argv) != 7: + print("Usage:" + sys.argv[0] + " model_item path/to/log/file path/to/res/log/file") + sys.exit() + + + model_item = sys.argv[1] + log_file = sys.argv[2] + res_log_file = sys.argv[3] + device_num = sys.argv[4] + bs = int(sys.argv[5]) + fp_item = sys.argv[6] + + analyze(model_item, log_file, res_log_file, device_num, bs, fp_item) \ No newline at end of file diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh new file mode 100644 index 000000000..6b6dbdf7c --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/prepare.sh @@ -0,0 +1,59 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +if [ ! -d "stable-diffusion-3-medium-diffusers-paddle-init" ]; then + echo "Downloading stable-diffusion-3-medium-diffusers-paddle-init.tar.gz..." + wget https://bj.bcebos.com/paddlenlp/models/community/westfish/sd3_benchmark/stable-diffusion-3-medium-diffusers-paddle-init.tar.gz + echo "Extracting stable-diffusion-3-medium-diffusers-paddle-init.tar.gz..." + tar -zxvf stable-diffusion-3-medium-diffusers-paddle-init.tar.gz +else + echo "Directory stable-diffusion-3-medium-diffusers-paddle-init already exists. Skipping download." +fi + +if [ ! -d "dog" ]; then + echo "Downloading dog.zip..." + wget https://paddlenlp.bj.bcebos.com/models/community/westfish/develop-sdxl/dog.zip + echo "Unzipping dog.zip..." + unzip dog.zip +else + echo "Directory dog already exists. Skipping download." +fi + + +RUN_SETUP=${RUN_SETUP:-"true"} +if [ "$RUN_SETUP" = "true" ]; then + echo "Running setup and installation steps..." + + export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH + python -m pip install --upgrade pip + # python -m pip install paddlepaddle-gpu==3.0.0b2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ + python -m pip install einops + python -m pip install -r ../requirements.txt + python -m pip install --upgrade pybind11 regex sentencepiece tqdm visualdl attrdict easydict pyyaml paddlesde + python -m pip install paddlenlp==3.0.0b2 + python -m pip install huggingface-hub==0.23.0 + + # uninstall ppdiffusers and install develop paddlemix + python -m pip uninstall -y ppdiffusers + cd ../ppdiffusers/ + python -m pip install -e . + cd - + cd ../ppdiffusers/examples/dreambooth + pip install -r requirements_sd3.txt + cd - + python -m pip list +else + echo "fast mode, skipping setup and installation steps as RUN_SETUP is set to false." +fi diff --git a/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh new file mode 100644 index 000000000..7d6e3c26b --- /dev/null +++ b/tests/test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/run_benchmark.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash + +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test training benchmark for a model. +# Usage:bash benchmark/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} +function _set_params(){ + model_item=${1:-"stable_diffusion_3-dreambooth_ft"} # (必选) 模型 item |fastscnn|segformer_b0| ocrnet_hrnetw48 + base_batch_size=${2:-"1"} # (必选) 如果是静态图单进程,则表示每张卡上的BS,需在训练时*卡数 + fp_item=${3:-"fp32"} # (必选) fp32|fp16|bf16 + run_mode=${4:-"DP"} # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP1-MP4-PP1 + device_num=${5:-"N1C1"} # (必选) 使用的卡数量,N1C1|N1C8|N4C32 (4机32卡) + profiling=${PROFILING:-"false"} # (必选) Profiling 开关,默认关闭,通过全局变量传递 + + model_repo="PaddleMIX" # (必选) 模型套件的名字 + speed_unit="sample/sec" # (必选)速度指标单位 + skip_steps=0 # (必选)解析日志,跳过模型前几个性能不稳定的step + keyword="ips:" # (必选)解析日志,筛选出性能数据所在行的关键字 + convergence_key="loss:" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:" + max_iter=${6:-"20"} # (可选)需保证模型执行时间在5分钟内,需要修改代码提前中断的直接提PR 合入套件 或是max_epoch + num_workers=${7:-"5"} # (可选) + is_large_model=False # (可选)普通模型默认为False,如果添加大模型且只取一条ips设置为True + + # 以下为通用执行命令,无特殊可不用修改 + model_name=${model_item}_bs${base_batch_size}_${fp_item}_${run_mode} # (必填) 且格式不要改动,与竞品名称对齐 + device=${CUDA_VISIBLE_DEVICES//,/ } + arr=(${device}) + num_gpu_devices=${#arr[*]} + run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # (必填) TRAIN_LOG_DIR benchmark框架设置该参数为全局变量 + profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)} # (必填) PROFILING_LOG_DIR benchmark框架设置该参数为全局变量 + speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)} + + train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log + profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling + speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed +} + +function _train(){ + batch_size=${base_batch_size} # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs + echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}" + if [ ${profiling} = "true" ];then + add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\"" + log_file=${profiling_log_file} + else + add_options="" + log_file=${train_log_file} + fi + + # 原生动态图 + export FLAG_FUSED_LINEAR=0 + export FLAGS_conv_workspace_size_limit=4096 + + export FLAGS_cudnn_deterministic=True + env |grep FLAG + + if [ ${fp_item} = "fp32" ]; then + fp_item_cmd="no" + else + fp_item_cmd=${fp_item} + fi + echo "------------" + ls; + echo "------------" + + + if [ ${model_item} = "stable_diffusion_3-dreambooth_ft" ];then + train_cmd=" + ../ppdiffusers/examples/dreambooth/train_dreambooth_sd3.py \ + --pretrained_model_name_or_path=stable-diffusion-3-medium-diffusers-paddle-init \ + --instance_data_dir=dog \ + --output_dir=trained-sd3 \ + --mixed_precision=${fp_item_cmd} \ + --instance_prompt=a-photo-of-sks-dog \ + --resolution=512 \ + --train_batch_size=${batch_size} \ + --gradient_accumulation_steps=4 \ + --learning_rate=5e-5 \ + --report_to=tensorboard \ + --lr_scheduler=constant \ + --lr_warmup_steps=0 \ + --max_train_steps=${max_iter} \ + --validation_prompt=A-photo-of-sks-dog-in-a-bucket \ + --validation_epochs=100 \ + --num_validation_images 1 \ + --seed=0 \ + --checkpointing_steps=10000 + " + else + export USE_PEFT_BACKEND=True + train_cmd=" + ../ppdiffusers/examples/dreambooth/train_dreambooth_lora_sd3.py \ + --pretrained_model_name_or_path=stable-diffusion-3-medium-diffusers-paddle-init \ + --instance_data_dir=dog \ + --output_dir=trained-sd3-lora \ + --mixed_precision=${fp_item_cmd} \ + --instance_prompt=a-photo-of-sks-dog \ + --resolution=512 \ + --train_batch_size=${batch_size} \ + --gradient_accumulation_steps=4 \ + --learning_rate=5e-5 \ + --report_to=tensorboard \ + --lr_scheduler=constant \ + --lr_warmup_steps=0 \ + --max_train_steps=${max_iter} \ + --validation_prompt=A-photo-of-sks-dog-in-a-bucket \ + --validation_epochs=100 \ + --num_validation_images 1 \ + --seed=0 \ + --checkpointing_steps=10000 + " + fi + + # 以下为通用执行命令,无特殊可不用修改 + case ${run_mode} in + DP) if [[ ${device_num} = "N1C1" ]];then + echo "run ${run_mode} " + train_cmd="python -u ${train_cmd}" + else + rm -rf ./mylog # 注意执行前删掉log目录 + train_cmd="python -u -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES \ + ${train_cmd}" + fi + ;; + DP1-MP1-PP1) echo "run run_mode: DP1-MP1-PP1" ;; + *) echo "choose run_mode "; exit 1; + esac + + echo "train_cmd: ${train_cmd} log_file: ${log_file}" + RUN_SLOW=${RUN_SLOW:-"true"} + if [ "$RUN_SLOW" = "true" ]; then + timeout 30m ${train_cmd} > ${log_file} 2>&1 + else + echo "fast mode, only run 3m" + timeout 3m ${train_cmd} > ${log_file} 2>&1 + fi + # eval ${train_cmd} + # eval "timeout 30m ${train_cmd} > ${log_file} 2>&1" + if [ $? -ne 0 ];then + echo -e "${model_name}, FAIL" + else + echo -e "${model_name}, SUCCESS" + fi + # kill -9 `ps -ef|grep 'python'|awk '{print $2}'` + + if [ ${device_num} != "N1C1" -a -d mylog ]; then + rm ${log_file} + cp mylog/workerlog.0 ${log_file} + fi + echo ${train_cmd} >> ${log_file} + cat ${log_file} +} + +function _analysis_log(){ + # cd - + analysis_log_cmd="python test_tipc/dygraph/dp/stable_diffusion_3/benchmark_common/analysis_log.py \ + ${model_item} ${log_file} ${speed_log_file} ${device_num} ${base_batch_size} ${fp_item}" + echo ${analysis_log_cmd} + eval ${analysis_log_cmd} +} + +_set_params $@ +str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`) +export frame_version=${str_tmp%%.post*} +export frame_commit=$(echo `python -c "import paddle;print(paddle.version.commit)"`) +export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3` +export model_commit=$(git log|head -n1|awk '{print $2}') +echo "---------frame_version is ${frame_version}" +echo "---------Paddle commit is ${frame_commit}" +echo "---------Model commit is ${model_commit}" +echo "---------model_branch is ${model_branch}" + +job_bt=`date '+%Y%m%d%H%M%S'` +_train +job_et=`date '+%Y%m%d%H%M%S'` +export model_run_time=$((${job_et}-${job_bt})) +_analysis_log