You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[rank0]: Traceback (most recent call last):
[rank0]: File "Pai-Megatron-Patch-0925/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_dense_and_moe_gqa.py", line 924, in <module>
[rank0]: main()
[rank0]: File "Pai-Megatron-Patch-0925/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_dense_and_moe_gqa.py", line 920, in main
[rank0]: check_hf_mg_forward(hf_model, mg_model, args)
[rank0]: File "/Pai-Megatron-Patch-0925/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_dense_and_moe_gqa.py", line 875, in check_hf_mg_forward
[rank0]: mglogits = mgmodel(input_ids=input_ids, attention_mask=attention_mask, position_ids=position_ids)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/Pai-Megatron-Patch-0925/megatron_patch/model/qwen2/model.py", line 203, in forward
[rank0]: hidden_states = self.decoder(
[rank0]: ^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/Pai-Megatron-Patch-0925/megatron_patch/model/qwen2/transformer_block.py", line 402, in forward
[rank0]: hidden_states, context = layer(
[rank0]: ^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1603, in _call_impl
[rank0]: result = forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "Pai-Megatron-Patch-0925/megatron_patch/model/qwen2/transformer_layer.py", line 188, in forward
[rank0]: attention_output_with_bias = self.self_attention(
[rank0]: ^^^^^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1603, in _call_impl
[rank0]: result = forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "Pai-Megatron-Patch-0925/megatron_patch/model/qwen2/transformer/attention.py", line 320, in forward
[rank0]: core_attn_out = self.core_attention(
[rank0]: ^^^^^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/Pai-Megatron-Patch-0925/PAI-Megatron-LM-240718/megatron/core/transformer/custom_layers/transformer_engine.py", line 535, in forward
[rank0]: core_attn_out = super().forward(
[rank0]: ^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/transformer_engine-1.12.0.dev0-py3.11-linux-x86_64.egg/transformer_engine/pytorch/attention.py", line 7637, in forward
[rank0]: return self.flash_attention(
[rank0]: ^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "miniconda3/envs/torch2/lib/python3.11/site-packages/transformer_engine-1.12.0.dev0-py3.11-linux-x86_64.egg/transformer_engine/pytorch/attention.py", line 5076, in forward
[rank0]: output = func(
[rank0]: ^^^^^
[rank0]: TypeError: flash_attn_func() got an unexpected keyword argument 'block_table'
[rank0]:[W925 18:26:02.009275169 ProcessGroupNCCL.cpp:1168] Warning: WARNING: process group has NOT been destroyed before we destruct ProcessGroupNCCL. On normal program exit, the application should call destroy_process_group to ensure that any pending NCCL operations have finished in this process. In rare cases this process can exit before this point and block the progress of another member of the process group. This constraint has always been present, but this warning has only been added since PyTorch 2.4 (function operator())
E0925 18:26:04.038000 139626046395584 to
请求调用的应该是 sbhd causal mask无padding。
The text was updated successfully, but these errors were encountered:
请求调用的应该是 sbhd causal mask无padding。
The text was updated successfully, but these errors were encountered: