Created
December 30, 2023 05:48
-
-
Save qianwch/ed3446d0b6f6dea9044e4b492e498fca to your computer and use it in GitHub Desktop.
Mindformers-qwen-log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero. | |
setattr(self, word, getattr(machar, word).flat[0]) | |
/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero. | |
setattr(self, word, getattr(machar, word).flat[0]) | |
/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/jieba/_compat.py:18: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html | |
import pkg_resources | |
[WARNING] Distributed Communication has not been inited. Use default RANK_SIZE: 1 | |
[WARNING] Distributed Communication has not been inited. Use default RANK_SIZE: 1 | |
[WARNING] Distributed Communication has not been inited. Use default RANK_ID: 0 | |
[WARNING] Distributed Communication has not been inited. Use default RANK_ID: 0 | |
[WARNING] HCCL_ADPT(87237,ffffb3047930,python):2023-12-30-00:14:29.436.847 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:63] GenHcclOptions] The environment variable DEPLOY_MODE is not set. Now set to default value 0 | |
2023-12-30 00:14:29,672 - mindformers[mindformers/tools/utils.py:153] - INFO - set output path to './output' | |
2023-12-30 00:14:29,673 - mindformers[mindformers/trainer/trainer.py:176] - INFO - set output_dir from args:dict | |
2023-12-30 00:14:29,701 - mindformers[mindformers/trainer/base_trainer.py:85] - INFO - Now Running Task is: text_generation, Model is: qwen_14b | |
2023-12-30 00:14:29,701 - mindformers[mindformers/trainer/base_trainer.py:126] - WARNING - Input model name is not in the supported list or unspecified. | |
2023-12-30 00:14:29,702 - mindformers[mindformers/trainer/base_trainer.py:127] - WARNING - See the list of supported task and model name: OrderedDict([('general', OrderedDict([('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/general/run_general_task.yaml')])), ('masked_image_modeling', OrderedDict([('mae_vit_base_p16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/mae/run_mae_vit_base_p16_224_800ep.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/mae/run_mae_vit_base_p16_224_800ep.yaml')])), ('image_classification', OrderedDict([('vit_base_p16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/vit/run_vit_base_p16_224_100ep.yaml'), ('swin_base_p4w7', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/swin/run_swin_base_p4w7_224_100ep.yaml'), ('mindspore/vit_base_p16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/vit/run_vit_base_p16_224_100ep.yaml'), ('mindspore/swin_base_p4w7', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/swin/run_swin_base_p4w7_224_100ep.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/vit/run_vit_base_p16_224_100ep.yaml')])), ('fill_mask', OrderedDict([('bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bert/run_bert_base_uncased.yaml'), ('bert_tiny_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bert/run_bert_tiny_uncased.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bert/run_bert_tiny_uncased.yaml')])), ('contrastive_language_image_pretrain', OrderedDict([('clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml'), ('blip2_stage1_vit_g', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage1_vit_g_qformer_pretrain.yaml'), ('blip2_stage2_vit_g_baichuan_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_baichuan_7b.yaml'), ('blip2_stage2_vit_g_llama_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_llama_7b.yaml'), ('mindspore/clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml'), ('clip_vit_b_16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_16_pretrain_flickr8k.yaml'), ('clip_vit_l_14', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14_pretrain_flickr8k.yaml'), ('clip_vit_l_14@336', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14@336_pretrain_flickr8k.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml')])), ('image_to_text_retrieval', OrderedDict([('blip2_stage1_evaluator', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage1_vit_g_retrieval_flickr30k.yaml')])), ('zero_shot_image_classification', OrderedDict([('clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml'), ('mindspore/clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml'), ('clip_vit_b_16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_16_zero_shot_image_classification_cifar100.yaml'), ('clip_vit_l_14', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14_zero_shot_image_classification_cifar100.yaml'), ('clip_vit_l_14@336', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14@336_zero_shot_image_classification_cifar100.yaml'), ('blip2_stage1_classification', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage1_vit_g_zero_shot_image_classification_cifar100.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml')])), ('image_to_text_generation', OrderedDict([('itt_blip2_stage2_vit_g_baichuan_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_baichuan_7b_image_to_text_generation.yaml'), ('itt_blip2_stage2_vit_g_llama_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_llama_7b_image_to_text_generation.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_blip2_stage2_vit_g_llama_7b_image_to_text_generation.yaml')])), ('translation', OrderedDict([('t5_small', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/t5/run_t5_small_on_wmt16.yaml'), ('t5_tiny', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/t5/run_t5_tiny_on_wmt16.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/t5/run_t5_small_on_wmt16.yaml')])), ('text_classification', OrderedDict([('txtcls_bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased.yaml'), ('txtcls_bert_base_uncased_mnli', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased_mnli.yaml'), ('mindspore/txtcls_bert_base_uncased_mnli', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased_mnli.yaml'), ('gpt2_txtcls', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_txtcls.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased.yaml')])), ('token_classification', OrderedDict([('tokcls_bert_base_chinese', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/tokcls/run_tokcls_bert_base_chinese.yaml'), ('tokcls_bert_base_chinese_cluener', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/tokcls/run_tokcls_bert_base_chinese_cluener.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/tokcls/run_tokcls_bert_base_chinese.yaml')])), ('question_answering', OrderedDict([('qa_bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml'), ('qa_bert_base_uncased_squad', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml'), ('mindspore/qa_bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml')])), ('text_generation', OrderedDict([('gpt2', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2.yaml'), ('gpt2_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_lora.yaml'), ('gpt2_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_13b.yaml'), ('gpt2_52b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_52b.yaml'), ('gpt2_xl', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_xl.yaml'), ('gpt2_xl_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_xl_lora.yaml'), ('llama_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_7b.yaml'), ('llama_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_13b.yaml'), ('llama_65b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_65b.yaml'), ('llama2_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama2/run_llama2_7b.yaml'), ('llama2_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama2/run_llama2_13b.yaml'), ('llama2_70b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama2/run_llama2_70b.yaml'), ('codellama_34b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/codellama/run_codellama_34b_910b.yaml'), ('llama_7b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_7b_lora.yaml'), ('pangualpha_2_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/pangualpha/run_pangualpha_2_6b.yaml'), ('pangualpha_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/pangualpha/run_pangualpha_13b.yaml'), ('glm_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_finetune.yaml'), ('glm_6b_chat', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_infer.yaml'), ('glm_6b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_lora.yaml'), ('glm_6b_lora_chat', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_lora_infer.yaml'), ('glm2_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm2/run_glm2_6b.yaml'), ('glm2_6b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm2/run_glm2_6b_lora.yaml'), ('glm2_6b_ptuning2', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm2/run_glm2_6b_ptuning2.yaml'), ('glm3_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm3/run_glm3_6b.yaml'), ('codegeex2_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/codegeex2/run_codegeex2_6b.yaml'), ('bloom_560m', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_560m.yaml'), ('bloom_7.1b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_7.1b.yaml'), ('bloom_65b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_65b.yaml'), ('bloom_176b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_176b.yaml'), ('baichuan_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/baichuan/run_baichuan_7b.yaml'), ('baichuan2_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/baichuan2/run_baichuan2_7b.yaml'), ('baichuan2_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/baichuan2/run_baichuan2_13b.yaml'), ('ziya_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/ziya/run_ziya_13b.yaml'), ('skywork_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/skywork/run_skywork_13b.yaml'), ('internlm_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/internlm/run_internlm_7b.yaml'), ('internlm_7b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/internlm/run_internlm_7b_lora.yaml'), ('qwen_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/qwen/run_qwen_7b.yaml'), ('qwen_7b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/qwen/run_qwen_7b_lora.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2.yaml')])), ('segment_anything', OrderedDict([('sam_vit_b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-b.yaml'), ('sam_vit_l', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-l.yaml'), ('sam_vit_h', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-h.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-h.yaml')]))]) | |
2023-12-30 00:14:29,702 - mindformers[mindformers/trainer/base_trainer.py:128] - WARNING - The default model config: /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2.yaml will now be used for the text_generation task | |
2023-12-30 00:14:29,703 - mindformers[mindformers/core/parallel_config.py:51] - INFO - initial parallel_config from dict: {'data_parallel': 1, 'model_parallel': 4, 'pipeline_stage': 1, 'use_seq_parallel': False, 'micro_batch_num': 4, 'vocab_emb_dp': True, 'gradient_aggregation_group': 4} | |
2023-12-30 00:14:29,703 - mindformers[mindformers/trainer/base_trainer.py:191] - INFO - The current parallel mode is semi_auto_parallel, full batch is True,so global batch size will be changed: global_batch_size = batch_size * data_parallel * micro_batch_interleave_num * gradient_accumulation_steps = 1 = 1 * 1 * 1 * 1 | |
2023-12-30 00:14:29,703 - mindformers[mindformers/trainer/base_trainer.py:371] - INFO - .........Build Network From Config.......... | |
2023-12-30 00:14:29,704 - mindformers[mindformers/models/llama/llama_config.py:177] - WARNING - Argument `use_past_shard` is deprecated. | |
2023-12-30 00:14:29,704 - mindformers[mindformers/version_control.py:60] - INFO - The Cell Reuse compilation acceleration feature is not supported when the environment variable ENABLE_CELL_REUSE is 0 or MindSpore version is earlier than 2.1.0 or stand_alone mode or pipeline_stages <= 1 | |
2023-12-30 00:14:29,704 - mindformers[mindformers/version_control.py:64] - INFO - | |
The current ENABLE_CELL_REUSE=0, please set the environment variable as follows: | |
export ENABLE_CELL_REUSE=1 to enable the Cell Reuse compilation acceleration feature. | |
2023-12-30 00:14:29,704 - mindformers[mindformers/version_control.py:73] - INFO - The Cell Reuse compilation acceleration feature only works in pipeline parallel mode(pipeline_stage>1).Current pipeline stage=1, the feature is disabled by default. | |
2023-12-30 00:14:29,710 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:29,712 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:30,357 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:30,362 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:30,364 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:14:30.369.919 [mindspore/common/parameter.py:786] This interface may be deleted in the future. | |
2023-12-30 00:14:30,372 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:30,373 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:31,018 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:31,027 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:31,029 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:31,037 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:31,039 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:31,680 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:31,684 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:31,686 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:31,693 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:31,695 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:32,337 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:32,340 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:32,342 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:32,350 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:32,351 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:32,992 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:32,996 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:32,998 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:33,005 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:33,007 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:33,647 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:33,651 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:33,653 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:33,660 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:33,662 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:34,305 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:34,309 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:34,311 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:34,318 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:34,320 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:34,961 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:34,965 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:34,966 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:34,974 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:34,975 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:35,617 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:35,621 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:35,623 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:35,630 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:35,632 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:36,271 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:36,275 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:36,277 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:36,284 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:36,286 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:36,926 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:36,930 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:36,932 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:36,940 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:36,941 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:37,582 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:37,585 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:37,587 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:37,595 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:37,596 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:38,238 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:38,242 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:38,244 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:38,252 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:38,253 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:38,900 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:38,904 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:38,906 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:38,913 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:38,915 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:39,560 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:39,564 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:39,566 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:39,573 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:39,575 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:40,230 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:40,234 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:40,236 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:40,245 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:40,247 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:40,892 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:40,896 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:40,898 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:40,905 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:40,907 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:41,553 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:41,557 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:41,559 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:41,566 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:41,567 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:42,209 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:42,213 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:42,215 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:42,222 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:42,223 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:42,869 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:42,873 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:42,875 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:42,882 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:42,884 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:43,529 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:43,533 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:43,535 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:43,542 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:43,544 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:44,189 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:44,193 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:44,195 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:44,202 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:44,204 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:44,849 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:44,853 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:44,855 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:44,862 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:44,864 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:45,505 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:45,514 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:45,516 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:45,524 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:45,525 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:46,166 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:46,170 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:46,172 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:46,179 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:46,180 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:46,825 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:46,829 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:46,831 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:46,838 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:46,840 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:47,486 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:47,489 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:47,491 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:47,498 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:47,500 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:48,145 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:48,148 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:48,150 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:48,158 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:48,159 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:48,811 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:48,815 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:48,817 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:48,824 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:48,826 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:49,466 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:49,470 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:49,472 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:49,479 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:49,481 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:50,126 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:50,130 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:50,132 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:50,141 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:50,142 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:50,796 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:50,800 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:50,802 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:50,809 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:50,811 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:51,461 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:51,465 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:51,467 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:51,474 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:51,476 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:52,117 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:52,121 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:52,123 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:52,130 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:52,131 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:52,777 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:52,781 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:52,782 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:52,790 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:52,791 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:53,438 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:53,441 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:53,443 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:53,451 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:53,452 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:54,094 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:54,098 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:54,100 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:54,107 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:54,109 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:54,756 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:54,760 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:54,762 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:54,769 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:54,771 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:55,420 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:55,424 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:55,425 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:55,433 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:55,434 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:56,077 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:56,081 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell. | |
2023-12-30 00:14:56,083 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:57,063 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version. | |
2023-12-30 00:14:57,078 - mindformers[mindformers/models/base_model.py:117] - INFO - model built, but weights is unloaded, since the config has no checkpoint_name_or_path attribute or checkpoint_name_or_path is None. | |
2023-12-30 00:14:57,093 - mindformers[mindformers/trainer/base_trainer.py:515] - INFO - Network Parameters: 14167 M. | |
[WARNING] DEVICE(87237,ffffb3047930,python):2023-12-30-00:14:58.432.866 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:103] Initialize] Reserved memory size for other components(1073741824) is less than recommend size(2145292800), It may lead to Out Of Memory in HCCL or other components, Please double check context key 'variable_memory_max_size'/'max_device_memory' | |
/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero. | |
setattr(self, word, getattr(machar, word).flat[0]) | |
/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero. | |
setattr(self, word, getattr(machar, word).flat[0]) | |
[WARNING] PRE_ACT(87237,ffffb3047930,python):2023-12-30-00:16:38.750.638 [mindspore/ccsrc/backend/common/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.cc:84] IncreaseAllgatherFusionId] Increase the duplicated allgather fusion id | |
2023-12-30 00:17:49,800 - mindformers[mindformers/trainer/utils.py:596] - INFO - .............Start load checkpoint from checkpoint.................. | |
2023-12-30 00:17:49,801 - mindformers[mindformers/trainer/utils.py:245] - INFO - When distributed loads are sliced weights,load_checkpoint should be a checkpoint directory containing the directory of rank_{0-*},The directory structure is as follows: **checkpoint_root_dir/rank_{0-*}/**.ckpt | |
2023-12-30 00:19:26,982 - mindformers[mindformers/trainer/utils.py:258] - INFO - Distribute load is success. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:29.643.743 [mindspore/train/serialization.py:172] The type of transformer.wte.embedding_weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:32.611.345 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:32.814.382 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:32.865.933 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.210.12 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.116.387 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.206.440 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.302.318 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.534.612 [mindspore/train/serialization.py:172] The type of transformer.layers.0.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:34.565.13 [mindspore/train/serialization.py:172] The type of transformer.layers.0.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:34.558.620 [mindspore/train/serialization.py:172] The type of transformer.layers.0.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:34.906.088 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.112.506 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.210.534 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.304.827 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.397.993 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.508.718 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.604.662 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.886.504 [mindspore/train/serialization.py:172] The type of transformer.layers.1.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:36.427.475 [mindspore/train/serialization.py:172] The type of transformer.layers.1.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.186.58 [mindspore/train/serialization.py:172] The type of transformer.layers.1.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.385.227 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.598.188 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.687.603 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.802.418 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.897.706 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.916. [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.935.86 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.389.393 [mindspore/train/serialization.py:172] The type of transformer.layers.2.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.992.544 [mindspore/train/serialization.py:172] The type of transformer.layers.2.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:39.532.962 [mindspore/train/serialization.py:172] The type of transformer.layers.2.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:39.876.857 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.685.51 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.159.963 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.280.464 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.384.605 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.416.777 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.508.013 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.606.761 [mindspore/train/serialization.py:172] The type of transformer.layers.3.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.960.073 [mindspore/train/serialization.py:172] The type of transformer.layers.3.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.315.835 [mindspore/train/serialization.py:172] The type of transformer.layers.3.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.621.338 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.748.162 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.854.197 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.886.329 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.978.509 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.107.24 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.103.064 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.190.918 [mindspore/train/serialization.py:172] The type of transformer.layers.4.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.544.984 [mindspore/train/serialization.py:172] The type of transformer.layers.4.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.941.873 [mindspore/train/serialization.py:172] The type of transformer.layers.4.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.233.123 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.355.428 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.445.002 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.476.827 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.562.141 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.594.368 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.686.504 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.776.271 [mindspore/train/serialization.py:172] The type of transformer.layers.5.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.112.113 [mindspore/train/serialization.py:172] The type of transformer.layers.5.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.466.775 [mindspore/train/serialization.py:172] The type of transformer.layers.5.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.767.148 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.886.560 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.977.884 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.291.9 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.913.29 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.122.321 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.214.277 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.284.537 [mindspore/train/serialization.py:172] The type of transformer.layers.6.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.619.414 [mindspore/train/serialization.py:172] The type of transformer.layers.6.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.956.637 [mindspore/train/serialization.py:172] The type of transformer.layers.6.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.242.113 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.377.666 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.471.024 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.501.071 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.593.557 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.620.175 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.710.899 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.793.061 [mindspore/train/serialization.py:172] The type of transformer.layers.7.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.143.461 [mindspore/train/serialization.py:172] The type of transformer.layers.7.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.473.350 [mindspore/train/serialization.py:172] The type of transformer.layers.7.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.628.073 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.753.922 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.846.836 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.875.535 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.967.234 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.999.337 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.890.48 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.165.084 [mindspore/train/serialization.py:172] The type of transformer.layers.8.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.681.108 [mindspore/train/serialization.py:172] The type of transformer.layers.8.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.976.043 [mindspore/train/serialization.py:172] The type of transformer.layers.8.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.317.928 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.479.756 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.553.552 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.626.140 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.686.796 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.766.473 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.827.273 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.909.200 [mindspore/train/serialization.py:172] The type of transformer.layers.9.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:50.279.948 [mindspore/train/serialization.py:172] The type of transformer.layers.9.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:50.646.072 [mindspore/train/serialization.py:172] The type of transformer.layers.9.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:50.943.834 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.680.35 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.162.104 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.198.512 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.302.291 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.336.603 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.426.555 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.515.263 [mindspore/train/serialization.py:172] The type of transformer.layers.10.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.876.965 [mindspore/train/serialization.py:172] The type of transformer.layers.10.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.235.903 [mindspore/train/serialization.py:172] The type of transformer.layers.10.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.496.045 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.578.253 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.644.899 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.676.079 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.767.318 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.790.577 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.852.412 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.909.901 [mindspore/train/serialization.py:172] The type of transformer.layers.11.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.117.267 [mindspore/train/serialization.py:172] The type of transformer.layers.11.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.306.410 [mindspore/train/serialization.py:172] The type of transformer.layers.11.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.471.388 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.555.691 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.622.197 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.640.009 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.694.637 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.734.799 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.823.553 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.904.138 [mindspore/train/serialization.py:172] The type of transformer.layers.12.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.132.502 [mindspore/train/serialization.py:172] The type of transformer.layers.12.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.475.075 [mindspore/train/serialization.py:172] The type of transformer.layers.12.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.837.669 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.973.434 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.637.53 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.115.734 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.167.208 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.207.468 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.300.499 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.382.921 [mindspore/train/serialization.py:172] The type of transformer.layers.13.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.592.604 [mindspore/train/serialization.py:172] The type of transformer.layers.13.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.955.980 [mindspore/train/serialization.py:172] The type of transformer.layers.13.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.304.945 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.463.094 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.524.711 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.564.815 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.660.107 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.692.217 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.757.487 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.474.1 [mindspore/train/serialization.py:172] The type of transformer.layers.14.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.364.427 [mindspore/train/serialization.py:172] The type of transformer.layers.14.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.663.272 [mindspore/train/serialization.py:172] The type of transformer.layers.14.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.919.954 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.964.04 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.163.756 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.203.870 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.293.580 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.331.781 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.423.184 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.504.562 [mindspore/train/serialization.py:172] The type of transformer.layers.15.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.795.691 [mindspore/train/serialization.py:172] The type of transformer.layers.15.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.387.846 [mindspore/train/serialization.py:172] The type of transformer.layers.15.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.634.479 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.758.031 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.816.496 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.855.170 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.946.318 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.558.38 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.116.309 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.210.795 [mindspore/train/serialization.py:172] The type of transformer.layers.16.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.513.187 [mindspore/train/serialization.py:172] The type of transformer.layers.16.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.726.708 [mindspore/train/serialization.py:172] The type of transformer.layers.16.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.961.412 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.112.011 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.177.320 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.222.916 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.312.605 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.349.532 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.412.024 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.506.050 [mindspore/train/serialization.py:172] The type of transformer.layers.17.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.748.346 [mindspore/train/serialization.py:172] The type of transformer.layers.17.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.973.622 [mindspore/train/serialization.py:172] The type of transformer.layers.17.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.128.179 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.229.669 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.302.768 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.345.515 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.406.901 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.449.507 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.536.945 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.619.002 [mindspore/train/serialization.py:172] The type of transformer.layers.18.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.827.554 [mindspore/train/serialization.py:172] The type of transformer.layers.18.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.911.18 [mindspore/train/serialization.py:172] The type of transformer.layers.18.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.325.664 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.451.490 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.544.533 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.576.664 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.654.106 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.688.260 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.780.785 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.877.374 [mindspore/train/serialization.py:172] The type of transformer.layers.19.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.202.281 [mindspore/train/serialization.py:172] The type of transformer.layers.19.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.540.706 [mindspore/train/serialization.py:172] The type of transformer.layers.19.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.753.268 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.850.076 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.911.505 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.951.761 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.479.51 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.808.51 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.144.028 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.227.934 [mindspore/train/serialization.py:172] The type of transformer.layers.20.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.507.191 [mindspore/train/serialization.py:172] The type of transformer.layers.20.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.741.909 [mindspore/train/serialization.py:172] The type of transformer.layers.20.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.913.295 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.151.02 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.110.182 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.143.177 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.207.493 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.240.738 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.310.663 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.394.765 [mindspore/train/serialization.py:172] The type of transformer.layers.21.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.622.853 [mindspore/train/serialization.py:172] The type of transformer.layers.21.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.852.884 [mindspore/train/serialization.py:172] The type of transformer.layers.21.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.108.13 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.113.861 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.202.209 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.235.192 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.294.596 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.344.243 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.438.072 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.545.969 [mindspore/train/serialization.py:172] The type of transformer.layers.22.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.783.346 [mindspore/train/serialization.py:172] The type of transformer.layers.22.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.710.9 [mindspore/train/serialization.py:172] The type of transformer.layers.22.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.163.652 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.293.650 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.397.280 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.430.064 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.486.836 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.519.502 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.581.809 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.693.016 [mindspore/train/serialization.py:172] The type of transformer.layers.23.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.936.416 [mindspore/train/serialization.py:172] The type of transformer.layers.23.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.163.681 [mindspore/train/serialization.py:172] The type of transformer.layers.23.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.320.441 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.449.952 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.543.720 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.588.307 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.683.133 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.715.993 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.778.900 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.891.674 [mindspore/train/serialization.py:172] The type of transformer.layers.24.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.117.625 [mindspore/train/serialization.py:172] The type of transformer.layers.24.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.356.864 [mindspore/train/serialization.py:172] The type of transformer.layers.24.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.629.171 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.753.828 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.844.225 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.881.703 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.968.741 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.408. [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.680.79 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.154.774 [mindspore/train/serialization.py:172] The type of transformer.layers.25.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.497.490 [mindspore/train/serialization.py:172] The type of transformer.layers.25.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.847.382 [mindspore/train/serialization.py:172] The type of transformer.layers.25.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.138.687 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.269.221 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.360.666 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.400.769 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.493.261 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.528.398 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.621.801 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.710.419 [mindspore/train/serialization.py:172] The type of transformer.layers.26.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.638.50 [mindspore/train/serialization.py:172] The type of transformer.layers.26.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.445.030 [mindspore/train/serialization.py:172] The type of transformer.layers.26.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.701.809 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.826.937 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.904.706 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.944.969 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.344.63 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.699.51 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.158.195 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.264.919 [mindspore/train/serialization.py:172] The type of transformer.layers.27.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.485.471 [mindspore/train/serialization.py:172] The type of transformer.layers.27.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.705.548 [mindspore/train/serialization.py:172] The type of transformer.layers.27.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.867.368 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.993.167 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.834.85 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.130.802 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.220.610 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.259.697 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.352.288 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.455.665 [mindspore/train/serialization.py:172] The type of transformer.layers.28.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.691.411 [mindspore/train/serialization.py:172] The type of transformer.layers.28.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.925.196 [mindspore/train/serialization.py:172] The type of transformer.layers.28.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.939.77 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.224.727 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.315.400 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.347.918 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.401.128 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.450.668 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.543.591 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.646.280 [mindspore/train/serialization.py:172] The type of transformer.layers.29.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.846.158 [mindspore/train/serialization.py:172] The type of transformer.layers.29.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.331.09 [mindspore/train/serialization.py:172] The type of transformer.layers.29.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.184.317 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.300.414 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.389.959 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.429.820 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.520.308 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.567.747 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.657.636 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.728.735 [mindspore/train/serialization.py:172] The type of transformer.layers.30.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.928.203 [mindspore/train/serialization.py:172] The type of transformer.layers.30.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.114.830 [mindspore/train/serialization.py:172] The type of transformer.layers.30.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.277.994 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.404.092 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.580.551 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.607.950 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.704.398 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.740.571 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.829.036 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.905.601 [mindspore/train/serialization.py:172] The type of transformer.layers.31.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:19.272.387 [mindspore/train/serialization.py:172] The type of transformer.layers.31.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:19.625.457 [mindspore/train/serialization.py:172] The type of transformer.layers.31.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:19.923.697 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.301.02 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.885.52 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.123.411 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.212.266 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.241.343 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.332.399 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.407.361 [mindspore/train/serialization.py:172] The type of transformer.layers.32.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.715.249 [mindspore/train/serialization.py:172] The type of transformer.layers.32.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.914.293 [mindspore/train/serialization.py:172] The type of transformer.layers.32.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.674.88 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.188.920 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.268.550 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.300.973 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.385.320 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.419.537 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.506.842 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.585.341 [mindspore/train/serialization.py:172] The type of transformer.layers.33.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.769.195 [mindspore/train/serialization.py:172] The type of transformer.layers.33.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.967.155 [mindspore/train/serialization.py:172] The type of transformer.layers.33.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.122.703 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.233.893 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.319.927 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.354.878 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.458.756 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.485.193 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.572.608 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.640.210 [mindspore/train/serialization.py:172] The type of transformer.layers.34.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.840.375 [mindspore/train/serialization.py:172] The type of transformer.layers.34.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.256.54 [mindspore/train/serialization.py:172] The type of transformer.layers.34.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.172.301 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.307.490 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.395.371 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.430.829 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.515.615 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.548.733 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.635.346 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.702.520 [mindspore/train/serialization.py:172] The type of transformer.layers.35.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.901.627 [mindspore/train/serialization.py:172] The type of transformer.layers.35.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.903.65 [mindspore/train/serialization.py:172] The type of transformer.layers.35.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.246.005 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.360.536 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.419.082 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.455.698 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.540.870 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.570.835 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.665.868 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.733.669 [mindspore/train/serialization.py:172] The type of transformer.layers.36.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.922.977 [mindspore/train/serialization.py:172] The type of transformer.layers.36.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.111.864 [mindspore/train/serialization.py:172] The type of transformer.layers.36.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.263.407 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.414.406 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.501.113 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.533.688 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.616.836 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.640.137 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.795.118 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.926.389 [mindspore/train/serialization.py:172] The type of transformer.layers.37.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.303.492 [mindspore/train/serialization.py:172] The type of transformer.layers.37.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.664.528 [mindspore/train/serialization.py:172] The type of transformer.layers.37.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.809.632 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.927.201 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.211.40 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.514.75 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.141.832 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.163.909 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.226.285 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.387.562 [mindspore/train/serialization.py:172] The type of transformer.layers.38.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.636.010 [mindspore/train/serialization.py:172] The type of transformer.layers.38.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.901.214 [mindspore/train/serialization.py:172] The type of transformer.layers.38.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.500.45 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.167.980 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.253.882 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.285.260 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.373.861 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.402.797 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.504.672 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.677.928 [mindspore/train/serialization.py:172] The type of transformer.layers.39.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.975.418 [mindspore/train/serialization.py:172] The type of transformer.layers.39.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:29.212.831 [mindspore/train/serialization.py:172] The type of transformer.layers.39.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:30.718.306 [mindspore/train/serialization.py:172] The type of lm_head.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.310.992 [mindspore/train/serialization.py:1317] For 'load_param_into_net', 80 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.369 [mindspore/train/serialization.py:1322] transformer.layers.0.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.462 [mindspore/train/serialization.py:1322] transformer.layers.0.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.530 [mindspore/train/serialization.py:1322] transformer.layers.1.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.616 [mindspore/train/serialization.py:1322] transformer.layers.1.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.673 [mindspore/train/serialization.py:1322] transformer.layers.2.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.727 [mindspore/train/serialization.py:1322] transformer.layers.2.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.782 [mindspore/train/serialization.py:1322] transformer.layers.3.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.834 [mindspore/train/serialization.py:1322] transformer.layers.3.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.886 [mindspore/train/serialization.py:1322] transformer.layers.4.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.939 [mindspore/train/serialization.py:1322] transformer.layers.4.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.993 [mindspore/train/serialization.py:1322] transformer.layers.5.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.044 [mindspore/train/serialization.py:1322] transformer.layers.5.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.114 [mindspore/train/serialization.py:1322] transformer.layers.6.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.167 [mindspore/train/serialization.py:1322] transformer.layers.6.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.218 [mindspore/train/serialization.py:1322] transformer.layers.7.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.269 [mindspore/train/serialization.py:1322] transformer.layers.7.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.320 [mindspore/train/serialization.py:1322] transformer.layers.8.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.370 [mindspore/train/serialization.py:1322] transformer.layers.8.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.418 [mindspore/train/serialization.py:1322] transformer.layers.9.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.467 [mindspore/train/serialization.py:1322] transformer.layers.9.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.515 [mindspore/train/serialization.py:1322] transformer.layers.10.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.564 [mindspore/train/serialization.py:1322] transformer.layers.10.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.612 [mindspore/train/serialization.py:1322] transformer.layers.11.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.660 [mindspore/train/serialization.py:1322] transformer.layers.11.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.709 [mindspore/train/serialization.py:1322] transformer.layers.12.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.768 [mindspore/train/serialization.py:1322] transformer.layers.12.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.821 [mindspore/train/serialization.py:1322] transformer.layers.13.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.871 [mindspore/train/serialization.py:1322] transformer.layers.13.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.920 [mindspore/train/serialization.py:1322] transformer.layers.14.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.969 [mindspore/train/serialization.py:1322] transformer.layers.14.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.017 [mindspore/train/serialization.py:1322] transformer.layers.15.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.065 [mindspore/train/serialization.py:1322] transformer.layers.15.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.113 [mindspore/train/serialization.py:1322] transformer.layers.16.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.161 [mindspore/train/serialization.py:1322] transformer.layers.16.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.209 [mindspore/train/serialization.py:1322] transformer.layers.17.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.256 [mindspore/train/serialization.py:1322] transformer.layers.17.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.304 [mindspore/train/serialization.py:1322] transformer.layers.18.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.351 [mindspore/train/serialization.py:1322] transformer.layers.18.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.398 [mindspore/train/serialization.py:1322] transformer.layers.19.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.445 [mindspore/train/serialization.py:1322] transformer.layers.19.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.494 [mindspore/train/serialization.py:1322] transformer.layers.20.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.542 [mindspore/train/serialization.py:1322] transformer.layers.20.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.591 [mindspore/train/serialization.py:1322] transformer.layers.21.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.639 [mindspore/train/serialization.py:1322] transformer.layers.21.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.687 [mindspore/train/serialization.py:1322] transformer.layers.22.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.734 [mindspore/train/serialization.py:1322] transformer.layers.22.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.782 [mindspore/train/serialization.py:1322] transformer.layers.23.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.831 [mindspore/train/serialization.py:1322] transformer.layers.23.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.887 [mindspore/train/serialization.py:1322] transformer.layers.24.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.937 [mindspore/train/serialization.py:1322] transformer.layers.24.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.986 [mindspore/train/serialization.py:1322] transformer.layers.25.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.034 [mindspore/train/serialization.py:1322] transformer.layers.25.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.083 [mindspore/train/serialization.py:1322] transformer.layers.26.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.131 [mindspore/train/serialization.py:1322] transformer.layers.26.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.179 [mindspore/train/serialization.py:1322] transformer.layers.27.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.226 [mindspore/train/serialization.py:1322] transformer.layers.27.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.273 [mindspore/train/serialization.py:1322] transformer.layers.28.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.321 [mindspore/train/serialization.py:1322] transformer.layers.28.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.368 [mindspore/train/serialization.py:1322] transformer.layers.29.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.416 [mindspore/train/serialization.py:1322] transformer.layers.29.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.463 [mindspore/train/serialization.py:1322] transformer.layers.30.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.511 [mindspore/train/serialization.py:1322] transformer.layers.30.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.558 [mindspore/train/serialization.py:1322] transformer.layers.31.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.605 [mindspore/train/serialization.py:1322] transformer.layers.31.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.653 [mindspore/train/serialization.py:1322] transformer.layers.32.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.699 [mindspore/train/serialization.py:1322] transformer.layers.32.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.746 [mindspore/train/serialization.py:1322] transformer.layers.33.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.793 [mindspore/train/serialization.py:1322] transformer.layers.33.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.839 [mindspore/train/serialization.py:1322] transformer.layers.34.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.886 [mindspore/train/serialization.py:1322] transformer.layers.34.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.940 [mindspore/train/serialization.py:1322] transformer.layers.35.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.992 [mindspore/train/serialization.py:1322] transformer.layers.35.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.043 [mindspore/train/serialization.py:1322] transformer.layers.36.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.093 [mindspore/train/serialization.py:1322] transformer.layers.36.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.141 [mindspore/train/serialization.py:1322] transformer.layers.37.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.190 [mindspore/train/serialization.py:1322] transformer.layers.37.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.238 [mindspore/train/serialization.py:1322] transformer.layers.38.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.285 [mindspore/train/serialization.py:1322] transformer.layers.38.attention.kvcache_mgr.value_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.333 [mindspore/train/serialization.py:1322] transformer.layers.39.attention.kvcache_mgr.key_past is not loaded. | |
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.380 [mindspore/train/serialization.py:1322] transformer.layers.39.attention.kvcache_mgr.value_past is not loaded. | |
2023-12-30 00:20:33,315 - mindformers[mindformers/trainer/utils.py:607] - INFO - Network parameters are not loaded: (['transformer.layers.0.attention.kvcache_mgr.key_past', 'transformer.layers.0.attention.kvcache_mgr.value_past', 'transformer.layers.1.attention.kvcache_mgr.key_past', 'transformer.layers.1.attention.kvcache_mgr.value_past', 'transformer.layers.2.attention.kvcache_mgr.key_past', 'transformer.layers.2.attention.kvcache_mgr.value_past', 'transformer.layers.3.attention.kvcache_mgr.key_past', 'transformer.layers.3.attention.kvcache_mgr.value_past', 'transformer.layers.4.attention.kvcache_mgr.key_past', 'transformer.layers.4.attention.kvcache_mgr.value_past', 'transformer.layers.5.attention.kvcache_mgr.key_past', 'transformer.layers.5.attention.kvcache_mgr.value_past', 'transformer.layers.6.attention.kvcache_mgr.key_past', 'transformer.layers.6.attention.kvcache_mgr.value_past', 'transformer.layers.7.attention.kvcache_mgr.key_past', 'transformer.layers.7.attention.kvcache_mgr.value_past', 'transformer.layers.8.attention.kvcache_mgr.key_past', 'transformer.layers.8.attention.kvcache_mgr.value_past', 'transformer.layers.9.attention.kvcache_mgr.key_past', 'transformer.layers.9.attention.kvcache_mgr.value_past', 'transformer.layers.10.attention.kvcache_mgr.key_past', 'transformer.layers.10.attention.kvcache_mgr.value_past', 'transformer.layers.11.attention.kvcache_mgr.key_past', 'transformer.layers.11.attention.kvcache_mgr.value_past', 'transformer.layers.12.attention.kvcache_mgr.key_past', 'transformer.layers.12.attention.kvcache_mgr.value_past', 'transformer.layers.13.attention.kvcache_mgr.key_past', 'transformer.layers.13.attention.kvcache_mgr.value_past', 'transformer.layers.14.attention.kvcache_mgr.key_past', 'transformer.layers.14.attention.kvcache_mgr.value_past', 'transformer.layers.15.attention.kvcache_mgr.key_past', 'transformer.layers.15.attention.kvcache_mgr.value_past', 'transformer.layers.16.attention.kvcache_mgr.key_past', 'transformer.layers.16.attention.kvcache_mgr.value_past', 'transformer.layers.17.attention.kvcache_mgr.key_past', 'transformer.layers.17.attention.kvcache_mgr.value_past', 'transformer.layers.18.attention.kvcache_mgr.key_past', 'transformer.layers.18.attention.kvcache_mgr.value_past', 'transformer.layers.19.attention.kvcache_mgr.key_past', 'transformer.layers.19.attention.kvcache_mgr.value_past', 'transformer.layers.20.attention.kvcache_mgr.key_past', 'transformer.layers.20.attention.kvcache_mgr.value_past', 'transformer.layers.21.attention.kvcache_mgr.key_past', 'transformer.layers.21.attention.kvcache_mgr.value_past', 'transformer.layers.22.attention.kvcache_mgr.key_past', 'transformer.layers.22.attention.kvcache_mgr.value_past', 'transformer.layers.23.attention.kvcache_mgr.key_past', 'transformer.layers.23.attention.kvcache_mgr.value_past', 'transformer.layers.24.attention.kvcache_mgr.key_past', 'transformer.layers.24.attention.kvcache_mgr.value_past', 'transformer.layers.25.attention.kvcache_mgr.key_past', 'transformer.layers.25.attention.kvcache_mgr.value_past', 'transformer.layers.26.attention.kvcache_mgr.key_past', 'transformer.layers.26.attention.kvcache_mgr.value_past', 'transformer.layers.27.attention.kvcache_mgr.key_past', 'transformer.layers.27.attention.kvcache_mgr.value_past', 'transformer.layers.28.attention.kvcache_mgr.key_past', 'transformer.layers.28.attention.kvcache_mgr.value_past', 'transformer.layers.29.attention.kvcache_mgr.key_past', 'transformer.layers.29.attention.kvcache_mgr.value_past', 'transformer.layers.30.attention.kvcache_mgr.key_past', 'transformer.layers.30.attention.kvcache_mgr.value_past', 'transformer.layers.31.attention.kvcache_mgr.key_past', 'transformer.layers.31.attention.kvcache_mgr.value_past', 'transformer.layers.32.attention.kvcache_mgr.key_past', 'transformer.layers.32.attention.kvcache_mgr.value_past', 'transformer.layers.33.attention.kvcache_mgr.key_past', 'transformer.layers.33.attention.kvcache_mgr.value_past', 'transformer.layers.34.attention.kvcache_mgr.key_past', 'transformer.layers.34.attention.kvcache_mgr.value_past', 'transformer.layers.35.attention.kvcache_mgr.key_past', 'transformer.layers.35.attention.kvcache_mgr.value_past', 'transformer.layers.36.attention.kvcache_mgr.key_past', 'transformer.layers.36.attention.kvcache_mgr.value_past', 'transformer.layers.37.attention.kvcache_mgr.key_past', 'transformer.layers.37.attention.kvcache_mgr.value_past', 'transformer.layers.38.attention.kvcache_mgr.key_past', 'transformer.layers.38.attention.kvcache_mgr.value_past', 'transformer.layers.39.attention.kvcache_mgr.key_past', 'transformer.layers.39.attention.kvcache_mgr.value_past'], []) | |
{'auto_trans_ckpt': False, | |
'context': {'ascend_config': {'precision_mode': 'must_keep_origin_dtype'}, | |
'device_id': 0, | |
'device_target': 'Ascend', | |
'enable_graph_kernel': False, | |
'graph_kernel_flags': '--disable_expand_ops=Softmax,Dropout ' | |
'--enable_parallel_fusion=true ' | |
'--reduce_fuse_depth=8 ' | |
'--enable_auto_tensor_inplace=true', | |
'max_call_depth': 10000, | |
'save_graphs': False, | |
'save_graphs_path': './graph'}, | |
'device_num': 4, | |
'infer': {'increment_model_path': '/path/qwen_7b_inc.mindir', | |
'infer_seq_length': 1024, | |
'prefill_model_path': '/path/qwen_7b_prefill.mindir'}, | |
'load_checkpoint': '/data/modelscope/Qwen-14B-Chat-ms-parallel4', | |
'local_rank': 0, | |
'micro_batch_interleave_num': 1, | |
'model': {'arch': {'type': 'QwenForCausalLM'}, | |
'model_config': {'batch_size': 1, | |
'checkpoint_name_or_path': None, | |
'compute_dtype': 'float16', | |
'do_sample': False, | |
'emb_dropout_prob': 0.0, | |
'eos_token_id': 151643, | |
'hidden_size': 5120, | |
'intermediate_size': 13696, | |
'kv_channels': 128, | |
'layernorm_compute_type': 'float32', | |
'max_decode_length': 512, | |
'num_attention_heads': 40, | |
'num_hidden_layers': 40, | |
'offset': 0, | |
'pad_token_id': 151643, | |
'param_init_type': 'float16', | |
'repetition_penalty': 1, | |
'rms_norm_eps': 1e-06, | |
'rotary_dtype': 'float16', | |
'rotary_emb_base': 10000, | |
'rotary_pct': 1.0, | |
'seq_length': 8192, | |
'softmax_compute_type': 'float16', | |
'top_k': 0, | |
'top_p': 0.8, | |
'type': 'QwenConfig', | |
'use_past': True, | |
'use_past_shard': False, | |
'vocab_size': 152064}}, | |
'moe_config': <mindformers.modules.transformer.moe.MoEConfig object at 0xffff32a35250>, | |
'only_save_strategy': False, | |
'output_dir': './output', | |
'parallel': {'device_num': 4, | |
'enable_alltoall': False, | |
'enable_parallel_optimizer': True, | |
'full_batch': True, | |
'gradients_mean': False, | |
'parallel_mode': 'semi_auto_parallel', | |
'parallel_optimizer_config': {'gradient_accumulation_shard': False, | |
'parallel_optimizer_threshold': 64}, | |
'search_mode': 'sharding_propagation', | |
'strategy_ckpt_save_file': './output/strategy/./ckpt_strategy_rank_0.ckpt'}, | |
'parallel_config': <mindformers.modules.transformer.transformer.TransformerOpParallelConfig object at 0xfffea4364250>, | |
'processor': {'return_tensors': 'ms', | |
'tokenizer': {'model_max_length': 8192, | |
'pad_token': '<|endoftext|>', | |
'type': 'QwenTokenizer', | |
'vocab_file': '/data/modelscope/Qwen-14B-Chat/qwen.tiktoken'}, | |
'type': 'QwenProcessor'}, | |
'rank_id': 0, | |
'recompute_config': <mindformers.modules.transformer.transformer.TransformerRecomputeConfig object at 0xfffea465d8e0>, | |
'resume_training': False, | |
'run_mode': 'predict', | |
'runner_config': {'batch_size': 1, | |
'epochs': 1, | |
'gradient_accumulation_steps': 1, | |
'sink_mode': True, | |
'sink_size': 2}, | |
'seed': 0, | |
'src_strategy_path_or_dir': '', | |
'trainer': {'model_name': 'qwen_14b', 'type': 'CausalLanguageModelingTrainer'}, | |
'use_parallel': True} | |
2023-12-30 00:20:33,534 - mindformers[mindformers/generation/text_generator.py:1097] - INFO - Generation Config is: {'max_length': 2048, 'max_new_tokens': None, 'num_beams': 1, 'do_sample': False, 'use_past': True, 'temperature': 1.0, 'top_k': 0, 'top_p': 1.0, 'repetition_penalty': 1, 'encoder_repetition_penalty': 1.0, 'renormalize_logits': False, 'pad_token_id': 151643, 'bos_token_id': 1, 'eos_token_id': 151643, '_from_model_config': True} | |
2023-12-30 00:20:33,536 - mindformers[mindformers/generation/text_generator.py:176] - INFO - The generation mode will be **GREEDY_SEARCH**. | |
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.082 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 1 is not a tensor. | |
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.167 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 3 is not a tensor. | |
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.211 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 4 is not a tensor. | |
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.239 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 5 is not a tensor. | |
[WARNING] PRE_ACT(87237,ffffb3047930,python):2023-12-30-00:22:04.336.849 [mindspore/ccsrc/backend/common/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.cc:84] IncreaseAllgatherFusionId] Increase the duplicated allgather fusion id | |
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.827.953 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 1 is not a tensor. | |
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.828.091 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 3 is not a tensor. | |
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.828.136 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 4 is not a tensor. | |
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.828.166 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 5 is not a tensor. | |
[ERROR] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:24:09.687.723 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:143] CheckStrategyByVector] GatherInfo19941994: The strategy is ((1, 1), (1)), strategy len: 1 is not equal to inputs len: 2, index: 1 | |
[ERROR] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:24:09.687.833 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:962] InitForCostModelWithAutoRepeatCalc] GatherInfo19941994: CheckStrategy failed. | |
[ERROR] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:24:09.687.865 [mindspore/ccsrc/frontend/parallel/ops_info/gather_info.cc:1255] Init] GatherInfo19941994: Init failed. | |
Traceback (most recent call last): | |
File "/data/test1229/mindformers/research/qwen/run_qwen.py", line 165, in <module> | |
main(task=args.task, | |
File "/data/test1229/mindformers/research/qwen/run_qwen.py", line 113, in main | |
result = task.predict(input_data=prompt, | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/_checkparam.py", line 1313, in wrapper | |
return func(*args, **kwargs) | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/trainer/trainer.py", line 659, in predict | |
output_result = self.trainer.predict( | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/trainer/causal_language_modeling/causal_language_modeling.py", line 315, in predict | |
return self.predict_process(config=config, | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/trainer/base_trainer.py", line 878, in predict_process | |
output_results = self.pipeline_task(input_data, top_k=top_k) | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/pipeline/base_pipeline.py", line 123, in __call__ | |
outputs = self.run_single(inputs, preprocess_params, forward_params, postprocess_params) | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/pipeline/base_pipeline.py", line 170, in run_single | |
model_outputs = self.forward(model_inputs, **forward_params) | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/pipeline/text_generation_pipeline.py", line 180, in forward | |
output_ids = self.network.generate(input_ids, **forward_params) | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/generation/text_generator.py", line 1114, in generate | |
output_ids = self._greedy_search( | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/generation/text_generator.py", line 394, in _greedy_search | |
res = self._incremental_infer( | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/generation/text_generator.py", line 238, in _incremental_infer | |
res = self( | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/nn/cell.py", line 680, in __call__ | |
out = self.compile_and_run(*args, **kwargs) | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/nn/cell.py", line 1020, in compile_and_run | |
self.compile(*args, **kwargs) | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/nn/cell.py", line 997, in compile | |
_cell_graph_executor.compile(self, phase=self.phase, | |
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/common/api.py", line 1547, in compile | |
result = self._graph_executor.compile(obj, args, kwargs, phase, self._use_vm_mode()) | |
RuntimeError: Failure:operator Gather init failed | |
---------------------------------------------------- | |
- The Function Call Stack: (For framework developers) | |
---------------------------------------------------- | |
In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/models/llama/llama_layer.py:171/ freqs_cos = self.reshape(self.gather(self.freqs_cos, batch_valid_length, 0), (batch_size, 1, 1, self.head_dim))/ | |
In file /data/test1229/mindformers/research/qwen/qwen_model.py:294/ freqs_cis = self.freqs_mgr.increment(batch_valid_length, bs)/ | |
In file /data/test1229/mindformers/research/qwen/qwen_model.py:290/ if self.is_first_iteration:/ | |
In file /data/test1229/mindformers/research/qwen/qwen_model.py:284/ if not self.use_past:/ | |
In file /data/test1229/mindformers/research/qwen/qwen_model.py:125/ output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length)/ | |
In file /data/test1229/mindformers/research/qwen/qwen_model.py:123/ tokens = input_ids/ | |
In file /data/test1229/mindformers/research/qwen/qwen_model.py:120/ if self.training:/ | |
In file /data/test1229/mindformers/research/qwen/qwen_model.py:116/ if not isinstance(init_reset, Tensor):/ | |
In file /data/test1229/mindformers/research/qwen/qwen_model.py:111/ def construct(self, input_ids, labels=None, input_position=None, position_ids=None, attention_mask=None,/ | |
---------------------------------------------------- | |
- C++ Call Stack: (For framework developers) | |
---------------------------------------------------- | |
mindspore/ccsrc/frontend/parallel/step_parallel.cc:1655 ExtractStrategyAndInit | |
---------------------------------------------------- | |
- The Traceback of Net Construct Code: | |
---------------------------------------------------- | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:111 | |
def construct(self, input_ids, labels=None, input_position=None, position_ids=None, attention_mask=None, | |
^ | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:116 | |
if not isinstance(init_reset, Tensor): | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:120 | |
if self.training: | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:123 | |
tokens = input_ids | |
^ | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:125 | |
output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length) | |
^ | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:125 | |
output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length) | |
^ | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:125 | |
output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length) | |
^ | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:284 | |
if not self.use_past: | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:290 | |
if self.is_first_iteration: | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:295 | |
if self.is_dynamic and self.is_flexible_shape and not self.use_kvcache_op: | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:300 | |
mask = self.casual_mask.increment(self.kvcache_preprocess.range, batch_valid_length) | |
^ | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:294 | |
freqs_cis = self.freqs_mgr.increment(batch_valid_length, bs) | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:290 | |
if self.is_first_iteration: | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:125 | |
output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length) | |
^ | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:306 | |
for i in range(self.num_hidden_layers): | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:307 | |
hidden_states = self.layers[i](hidden_states, freqs_cis, mask, kvcache_inputs=kvcache_inputs) | |
^ | |
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:307 | |
hidden_states = self.layers[i](hidden_states, freqs_cis, mask, kvcache_inputs=kvcache_inputs) | |
^ | |
# In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/models/llama/llama_transformer.py:489 | |
h = self.attention(input_x, freqs_cis, mask, kvcache_inputs) | |
^ | |
# In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/models/llama/llama_transformer.py:245 | |
query = self.cast(self.wq(x), self.dtype) # dp, 1 -> dp, mp | |
^ | |
# In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/modules/layers.py:487 | |
if self.expert_flag: | |
# In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/modules/layers.py:487 | |
if self.expert_flag: | |
# In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/modules/layers.py:495 | |
x = self.matmul(x, weight) | |
^ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment