Skip to content

Commit

Permalink
[llava test bench]fix O2 amp_master_grad (#830)
Browse files Browse the repository at this point in the history
  • Loading branch information
pkhk-1 authored Nov 27, 2024
1 parent a75eff3 commit 309f631
Show file tree
Hide file tree
Showing 13 changed files with 299 additions and 90 deletions.
41 changes: 41 additions & 0 deletions paddlemix/config/llava/pretrain_13b_benchmark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"model_name_or_path": "paddlemix/llava/vicuna-13b-v1.5",
"freeze_include": ["*llama*", "*lm_head*"],
"freeze_exclude": ["*llama.mm_projector*"],
"dataset": {
"train":[{"name": "chatml_dataset", "data_files": "./llava_bench_data/ScienceQA_val_500.json"}]
},
"mixtoken": false,
"output_dir": "./work_dirs/llava-v1.6-vicuna-13b_pretrain",
"overwrite_output_dir": true,
"recompute": true,
"per_device_train_batch_size": 16,
"gradient_accumulation_steps":1,
"per_device_eval_batch_size": 1,
"eval_accumulation_steps":1,
"group_by_modality_length": false,
"num_train_epochs": 3,
"learning_rate": 1e-03,
"warmup_ratio": 0.03,
"lr_scheduler_type": "cosine",
"logging_steps": 1,
"save_steps": 100,
"evaluation_strategy": "no",
"save_strategy": "no",
"max_length": 2048,
"bf16": true,
"fp16_opt_level": "O2",
"amp_master_grad": 1,
"do_train": true,
"do_eval": false,
"skip_memory_metrics": false,
"disable_tqdm": true,
"save_total_limit": 1,
"eval_steps": 120,
"tensor_parallel_degree": 1,
"sharding_parallel_degree": 8,
"sharding": "stage2",
"benchmark": true

}

40 changes: 40 additions & 0 deletions paddlemix/config/llava/pretrain_7b_benchmark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"model_name_or_path": "paddlemix/llava/vicuna-7b-v1.5",
"freeze_include": ["*llama*", "*lm_head*"],
"freeze_exclude": ["*llama.mm_projector*"],
"dataset": {
"train":[{"name": "chatml_dataset", "data_files": "./llava_bench_data/ScienceQA_val_500.json"}]
},
"mixtoken": false,
"output_dir": "./work_dirs/llava-v1.6-vicuna-7b_pretrain",
"overwrite_output_dir": true,
"recompute": true,
"per_device_train_batch_size": 16,
"gradient_accumulation_steps":1,
"per_device_eval_batch_size": 1,
"eval_accumulation_steps":1,
"group_by_modality_length": false,
"num_train_epochs": 3,
"learning_rate": 1e-03,
"warmup_ratio": 0.03,
"lr_scheduler_type": "cosine",
"logging_steps": 1,
"save_steps": 100,
"evaluation_strategy": "no",
"save_strategy": "no",
"max_length": 2048,
"bf16": true,
"fp16_opt_level": "O2",
"amp_master_grad": 1,
"do_train": true,
"do_eval": false,
"skip_memory_metrics": false,
"disable_tqdm": true,
"save_total_limit": 1,
"eval_steps": 120,
"tensor_parallel_degree": 1,
"sharding_parallel_degree": 8,
"sharding": "stage2",
"benchmark": true

}
51 changes: 51 additions & 0 deletions paddlemix/config/llava/v1_6/lora_sft_13b_argument_benchmark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"model_name_or_path": "liuhaotian/llava-v1.6-vicuna-13b",
"dataset": {
"train":[{"name": "chatml_dataset", "data_files": "./llava_bench_data/ScienceQA_val_500.json","chat_template":"./llava_bench_data/chat_template.json"}]
},
"mixtoken": false,
"output_dir": "./work_dirs/llava-v1.6-vicuna-13b_lora_sft",
"overwrite_output_dir": true,
"recompute": true,
"per_device_train_batch_size": 16,
"gradient_accumulation_steps": 1,
"per_device_eval_batch_size": 1,
"sharding": "stage2",
"tensor_parallel_degree": 1,
"sharding_parallel_degree": 8,
"eval_accumulation_steps":16,
"num_train_epochs": 3,
"learning_rate": 2e-04,
"mm_projector_lr": 2e-5,
"weight_decay": 0.0,
"warmup_ratio": 0.03,
"lr_scheduler_type": "cosine",
"logging_steps": 1,
"save_steps": 100,
"evaluation_strategy": "no",
"save_strategy": "no",
"max_length": 2048,
"bf16": true,
"fp16_opt_level": "O2",
"amp_master_grad": 1,
"do_train": true,
"do_eval": false,
"disable_tqdm": true,
"load_best_model_at_end": false,
"eval_with_do_generation": false,
"skip_memory_metrics": false,
"save_total_limit": 1,
"lora": true,
"lora_rank": 128,
"lora_alpha": 256,
"lora_dropout": 0.0,
"benchmark": true,
"lora_target_modules":["llama.layer.*q_proj.*",
"llama.layer.*k_proj.*",
"llama.layer.*v_proj.*",
"llama.layer.*gate_proj.*",
"llama.layer.*up_proj.*",
"llama.layer.*down_proj.*",
"llama.layer.*o_proj.*"]
}

51 changes: 51 additions & 0 deletions paddlemix/config/llava/v1_6/lora_sft_7b_argument_benchmark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"model_name_or_path": "liuhaotian/llava-v1.6-vicuna-7b",
"dataset": {
"train":[{"name": "chatml_dataset", "data_files": "./llava_bench_data/ScienceQA_val_500.json","chat_template":"./llava_bench_data/chat_template.json"}]
},
"mixtoken": false,
"output_dir": "./work_dirs/llava-v1.6-vicuna-7b_lora_sft",
"overwrite_output_dir": true,
"recompute": true,
"per_device_train_batch_size": 16,
"gradient_accumulation_steps": 1,
"per_device_eval_batch_size": 1,
"sharding": "stage2",
"tensor_parallel_degree": 1,
"sharding_parallel_degree": 8,
"eval_accumulation_steps":16,
"num_train_epochs": 3,
"learning_rate": 2e-04,
"mm_projector_lr": 2e-5,
"weight_decay": 0.0,
"warmup_ratio": 0.03,
"lr_scheduler_type": "cosine",
"logging_steps": 1,
"save_steps": 100,
"evaluation_strategy": "no",
"save_strategy": "no",
"max_length": 2048,
"bf16": true,
"fp16_opt_level": "O2",
"amp_master_grad": 1,
"do_train": true,
"do_eval": false,
"disable_tqdm": true,
"load_best_model_at_end": false,
"eval_with_do_generation": false,
"skip_memory_metrics": false,
"save_total_limit": 1,
"lora": true,
"lora_rank": 128,
"lora_alpha": 256,
"lora_dropout": 0.0,
"benchmark": true,
"lora_target_modules":["llama.layer.*q_proj.*",
"llama.layer.*k_proj.*",
"llama.layer.*v_proj.*",
"llama.layer.*gate_proj.*",
"llama.layer.*up_proj.*",
"llama.layer.*down_proj.*",
"llama.layer.*o_proj.*"]
}

8 changes: 4 additions & 4 deletions paddlemix/examples/llava/pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,11 @@ def main():
if training_args.benchmark:
total_effective_samples = total_samples * training_args.num_train_epochs
effective_samples_per_second = total_effective_samples / train_result.metrics["train_runtime"]
# mem_gpu = (
# train_result.metrics["train_mem_gpu_peaked_delta"] + train_result.metrics["train_mem_gpu_alloc_delta"]
# )
mem_gpu = (
train_result.metrics["train_mem_gpu_peaked_delta"] + train_result.metrics["train_mem_gpu_alloc_delta"]
)
logger.info(f"Effective_samples_per_second: {effective_samples_per_second} ")
# logger.info(f"train_mem_gpu_peaked: {int(mem_gpu/ (2**20))} MB")
logger.info(f"train_mem_gpu_peaked: {int(mem_gpu/ (2**20))} MB")
logger.info("Benchmark done.")
else:
trainer.save_model(merge_tensor_parallel=training_args.tensor_parallel_degree > 1)
Expand Down
8 changes: 4 additions & 4 deletions paddlemix/tools/supervised_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,11 @@ def main():
if training_args.benchmark:
total_effective_samples = total_samples * training_args.num_train_epochs
effective_samples_per_second = total_effective_samples / train_result.metrics["train_runtime"]
# mem_gpu = (
# train_result.metrics["train_mem_gpu_peaked_delta"] + train_result.metrics["train_mem_gpu_alloc_delta"]
# )
mem_gpu = (
train_result.metrics["train_mem_gpu_peaked_delta"] + train_result.metrics["train_mem_gpu_alloc_delta"]
)
logger.info(f"Effective_samples_per_second: {effective_samples_per_second} ")
# logger.info(f"train_mem_gpu_peaked: {int(mem_gpu/ (2**20))} MB")
logger.info(f"train_mem_gpu_peaked: {int(mem_gpu/ (2**20))} MB")
logger.info("Benchmark done.")
else:
trainer.save_model(merge_tensor_parallel=training_args.tensor_parallel_degree > 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
model=llava
model_item=llava-v1.6-vicuna-13b-lora_sft
bs_item=16
fp_item=bf16
fp_item=bf16O2
run_mode=DP
device_num=N1C8
max_epochs=3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
model=llava
model_item=llava-v1.6-vicuna-13b-pretrain
bs_item=16
fp_item=bf16
fp_item=bf16O2
run_mode=DP
device_num=N1C8
max_epochs=3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
model=llava
model_item=llava-v1.6-vicuna-13b-sft
bs_item=16
fp_item=bf16
fp_item=bf16O2
run_mode=DP
device_num=N1C8
max_epochs=3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
model=llava
model_item=llava-v1.6-vicuna-7b-lora_sft
bs_item=16
fp_item=bf16
fp_item=bf16O2
run_mode=DP
device_num=N1C8
max_epochs=3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
model=llava
model_item=llava-v1.6-vicuna-7b-pretrain
bs_item=16
fp_item=bf16
fp_item=bf16O2
run_mode=DP
device_num=N1C8
max_epochs=3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
model=llava
model_item=llava-v1.6-vicuna-7b-sft
bs_item=16
fp_item=bf16
fp_item=bf16O2
run_mode=DP
device_num=N1C8
max_epochs=3
Expand Down
Loading

0 comments on commit 309f631

Please sign in to comment.