qianwch · December 30, 2023 05:48
diff --git a/gistfile1.txt b/gistfile1.txt
 /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.
  setattr(self, word, getattr(machar, word).flat[0])
 /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.
  setattr(self, word, getattr(machar, word).flat[0])
 /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/jieba/_compat.py:18: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html
  import pkg_resources
 [WARNING] Distributed Communication has not been inited. Use default RANK_SIZE: 1
 [WARNING] Distributed Communication has not been inited. Use default RANK_SIZE: 1
 [WARNING] Distributed Communication has not been inited. Use default RANK_ID: 0
 [WARNING] Distributed Communication has not been inited. Use default RANK_ID: 0
 [WARNING] HCCL_ADPT(87237,ffffb3047930,python):2023-12-30-00:14:29.436.847 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:63] GenHcclOptions] The environment variable DEPLOY_MODE is not set. Now set to default value 0
 2023-12-30 00:14:29,672 - mindformers[mindformers/tools/utils.py:153] - INFO - set output path to './output'
 2023-12-30 00:14:29,673 - mindformers[mindformers/trainer/trainer.py:176] - INFO - set output_dir from args:dict
 2023-12-30 00:14:29,701 - mindformers[mindformers/trainer/base_trainer.py:85] - INFO - Now Running Task is: text_generation, Model is: qwen_14b
 2023-12-30 00:14:29,701 - mindformers[mindformers/trainer/base_trainer.py:126] - WARNING - Input model name is not in the supported list or unspecified.
 2023-12-30 00:14:29,702 - mindformers[mindformers/trainer/base_trainer.py:127] - WARNING - See the list of supported task and model name: OrderedDict([('general', OrderedDict([('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/general/run_general_task.yaml')])), ('masked_image_modeling', OrderedDict([('mae_vit_base_p16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/mae/run_mae_vit_base_p16_224_800ep.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/mae/run_mae_vit_base_p16_224_800ep.yaml')])), ('image_classification', OrderedDict([('vit_base_p16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/vit/run_vit_base_p16_224_100ep.yaml'), ('swin_base_p4w7', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/swin/run_swin_base_p4w7_224_100ep.yaml'), ('mindspore/vit_base_p16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/vit/run_vit_base_p16_224_100ep.yaml'), ('mindspore/swin_base_p4w7', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/swin/run_swin_base_p4w7_224_100ep.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/vit/run_vit_base_p16_224_100ep.yaml')])), ('fill_mask', OrderedDict([('bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bert/run_bert_base_uncased.yaml'), ('bert_tiny_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bert/run_bert_tiny_uncased.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bert/run_bert_tiny_uncased.yaml')])), ('contrastive_language_image_pretrain', OrderedDict([('clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml'), ('blip2_stage1_vit_g', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage1_vit_g_qformer_pretrain.yaml'), ('blip2_stage2_vit_g_baichuan_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_baichuan_7b.yaml'), ('blip2_stage2_vit_g_llama_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_llama_7b.yaml'), ('mindspore/clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml'), ('clip_vit_b_16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_16_pretrain_flickr8k.yaml'), ('clip_vit_l_14', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14_pretrain_flickr8k.yaml'), ('clip_vit_l_14@336', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14@336_pretrain_flickr8k.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml')])), ('image_to_text_retrieval', OrderedDict([('blip2_stage1_evaluator', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage1_vit_g_retrieval_flickr30k.yaml')])), ('zero_shot_image_classification', OrderedDict([('clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml'), ('mindspore/clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml'), ('clip_vit_b_16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_16_zero_shot_image_classification_cifar100.yaml'), ('clip_vit_l_14', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14_zero_shot_image_classification_cifar100.yaml'), ('clip_vit_l_14@336', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14@336_zero_shot_image_classification_cifar100.yaml'), ('blip2_stage1_classification', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage1_vit_g_zero_shot_image_classification_cifar100.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml')])), ('image_to_text_generation', OrderedDict([('itt_blip2_stage2_vit_g_baichuan_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_baichuan_7b_image_to_text_generation.yaml'), ('itt_blip2_stage2_vit_g_llama_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_llama_7b_image_to_text_generation.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_blip2_stage2_vit_g_llama_7b_image_to_text_generation.yaml')])), ('translation', OrderedDict([('t5_small', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/t5/run_t5_small_on_wmt16.yaml'), ('t5_tiny', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/t5/run_t5_tiny_on_wmt16.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/t5/run_t5_small_on_wmt16.yaml')])), ('text_classification', OrderedDict([('txtcls_bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased.yaml'), ('txtcls_bert_base_uncased_mnli', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased_mnli.yaml'), ('mindspore/txtcls_bert_base_uncased_mnli', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased_mnli.yaml'), ('gpt2_txtcls', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_txtcls.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased.yaml')])), ('token_classification', OrderedDict([('tokcls_bert_base_chinese', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/tokcls/run_tokcls_bert_base_chinese.yaml'), ('tokcls_bert_base_chinese_cluener', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/tokcls/run_tokcls_bert_base_chinese_cluener.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/tokcls/run_tokcls_bert_base_chinese.yaml')])), ('question_answering', OrderedDict([('qa_bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml'), ('qa_bert_base_uncased_squad', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml'), ('mindspore/qa_bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml')])), ('text_generation', OrderedDict([('gpt2', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2.yaml'), ('gpt2_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_lora.yaml'), ('gpt2_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_13b.yaml'), ('gpt2_52b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_52b.yaml'), ('gpt2_xl', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_xl.yaml'), ('gpt2_xl_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_xl_lora.yaml'), ('llama_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_7b.yaml'), ('llama_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_13b.yaml'), ('llama_65b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_65b.yaml'), ('llama2_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama2/run_llama2_7b.yaml'), ('llama2_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama2/run_llama2_13b.yaml'), ('llama2_70b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama2/run_llama2_70b.yaml'), ('codellama_34b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/codellama/run_codellama_34b_910b.yaml'), ('llama_7b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_7b_lora.yaml'), ('pangualpha_2_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/pangualpha/run_pangualpha_2_6b.yaml'), ('pangualpha_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/pangualpha/run_pangualpha_13b.yaml'), ('glm_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_finetune.yaml'), ('glm_6b_chat', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_infer.yaml'), ('glm_6b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_lora.yaml'), ('glm_6b_lora_chat', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_lora_infer.yaml'), ('glm2_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm2/run_glm2_6b.yaml'), ('glm2_6b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm2/run_glm2_6b_lora.yaml'), ('glm2_6b_ptuning2', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm2/run_glm2_6b_ptuning2.yaml'), ('glm3_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm3/run_glm3_6b.yaml'), ('codegeex2_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/codegeex2/run_codegeex2_6b.yaml'), ('bloom_560m', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_560m.yaml'), ('bloom_7.1b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_7.1b.yaml'), ('bloom_65b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_65b.yaml'), ('bloom_176b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_176b.yaml'), ('baichuan_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/baichuan/run_baichuan_7b.yaml'), ('baichuan2_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/baichuan2/run_baichuan2_7b.yaml'), ('baichuan2_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/baichuan2/run_baichuan2_13b.yaml'), ('ziya_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/ziya/run_ziya_13b.yaml'), ('skywork_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/skywork/run_skywork_13b.yaml'), ('internlm_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/internlm/run_internlm_7b.yaml'), ('internlm_7b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/internlm/run_internlm_7b_lora.yaml'), ('qwen_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/qwen/run_qwen_7b.yaml'), ('qwen_7b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/qwen/run_qwen_7b_lora.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2.yaml')])), ('segment_anything', OrderedDict([('sam_vit_b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-b.yaml'), ('sam_vit_l', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-l.yaml'), ('sam_vit_h', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-h.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-h.yaml')]))])
 2023-12-30 00:14:29,702 - mindformers[mindformers/trainer/base_trainer.py:128] - WARNING - The default model config: /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2.yaml will now be used for the text_generation task 
 2023-12-30 00:14:29,703 - mindformers[mindformers/core/parallel_config.py:51] - INFO - initial parallel_config from dict: {'data_parallel': 1, 'model_parallel': 4, 'pipeline_stage': 1, 'use_seq_parallel': False, 'micro_batch_num': 4, 'vocab_emb_dp': True, 'gradient_aggregation_group': 4}
 2023-12-30 00:14:29,703 - mindformers[mindformers/trainer/base_trainer.py:191] - INFO - The current parallel mode is semi_auto_parallel, full batch is True,so global batch size will be changed: global_batch_size = batch_size * data_parallel * micro_batch_interleave_num * gradient_accumulation_steps = 1 = 1 * 1 * 1 * 1
 2023-12-30 00:14:29,703 - mindformers[mindformers/trainer/base_trainer.py:371] - INFO - .........Build Network From Config..........
 2023-12-30 00:14:29,704 - mindformers[mindformers/models/llama/llama_config.py:177] - WARNING - Argument `use_past_shard` is deprecated.
 2023-12-30 00:14:29,704 - mindformers[mindformers/version_control.py:60] - INFO - The Cell Reuse compilation acceleration feature is not supported when the environment variable ENABLE_CELL_REUSE is 0 or MindSpore version is earlier than 2.1.0 or stand_alone mode or pipeline_stages <= 1
 2023-12-30 00:14:29,704 - mindformers[mindformers/version_control.py:64] - INFO - 
 The current ENABLE_CELL_REUSE=0, please set the environment variable as follows: 
 export ENABLE_CELL_REUSE=1 to enable the Cell Reuse compilation acceleration feature.
 2023-12-30 00:14:29,704 - mindformers[mindformers/version_control.py:73] - INFO - The Cell Reuse compilation acceleration feature only works in pipeline parallel mode(pipeline_stage>1).Current pipeline stage=1, the feature is disabled by default.
 2023-12-30 00:14:29,710 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:29,712 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:30,357 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:30,362 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:30,364 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:14:30.369.919 [mindspore/common/parameter.py:786] This interface may be deleted in the future.
 2023-12-30 00:14:30,372 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:30,373 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:31,018 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:31,027 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:31,029 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:31,037 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:31,039 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:31,680 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:31,684 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:31,686 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:31,693 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:31,695 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:32,337 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:32,340 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:32,342 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:32,350 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:32,351 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:32,992 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:32,996 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:32,998 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:33,005 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:33,007 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:33,647 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:33,651 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:33,653 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:33,660 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:33,662 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:34,305 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:34,309 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:34,311 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:34,318 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:34,320 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:34,961 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:34,965 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:34,966 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:34,974 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:34,975 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:35,617 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:35,621 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:35,623 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:35,630 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:35,632 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:36,271 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:36,275 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:36,277 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:36,284 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:36,286 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:36,926 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:36,930 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:36,932 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:36,940 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:36,941 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:37,582 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:37,585 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:37,587 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:37,595 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:37,596 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:38,238 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:38,242 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:38,244 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:38,252 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:38,253 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:38,900 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:38,904 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:38,906 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:38,913 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:38,915 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:39,560 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:39,564 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:39,566 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:39,573 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:39,575 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:40,230 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:40,234 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:40,236 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:40,245 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:40,247 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:40,892 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:40,896 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:40,898 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:40,905 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:40,907 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:41,553 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:41,557 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:41,559 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:41,566 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:41,567 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:42,209 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:42,213 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:42,215 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:42,222 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:42,223 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:42,869 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:42,873 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:42,875 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:42,882 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:42,884 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:43,529 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:43,533 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:43,535 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:43,542 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:43,544 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:44,189 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:44,193 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:44,195 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:44,202 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:44,204 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:44,849 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:44,853 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:44,855 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:44,862 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:44,864 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:45,505 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:45,514 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:45,516 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:45,524 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:45,525 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:46,166 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:46,170 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:46,172 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:46,179 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:46,180 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:46,825 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:46,829 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:46,831 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:46,838 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:46,840 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:47,486 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:47,489 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:47,491 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:47,498 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:47,500 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:48,145 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:48,148 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:48,150 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:48,158 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:48,159 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:48,811 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:48,815 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:48,817 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:48,824 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:48,826 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:49,466 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:49,470 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:49,472 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:49,479 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:49,481 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:50,126 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:50,130 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:50,132 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:50,141 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:50,142 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:50,796 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:50,800 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:50,802 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:50,809 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:50,811 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:51,461 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:51,465 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:51,467 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:51,474 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:51,476 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:52,117 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:52,121 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:52,123 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:52,130 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:52,131 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:52,777 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:52,781 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:52,782 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:52,790 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:52,791 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:53,438 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:53,441 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:53,443 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:53,451 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:53,452 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:54,094 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:54,098 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:54,100 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:54,107 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:54,109 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:54,756 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:54,760 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:54,762 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:54,769 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:54,771 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:55,420 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:55,424 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:55,425 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:55,433 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:55,434 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:56,077 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:56,081 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
 2023-12-30 00:14:56,083 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:57,063 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
 2023-12-30 00:14:57,078 - mindformers[mindformers/models/base_model.py:117] - INFO - model built, but weights is unloaded, since the config has no checkpoint_name_or_path attribute or checkpoint_name_or_path is None.
 2023-12-30 00:14:57,093 - mindformers[mindformers/trainer/base_trainer.py:515] - INFO - Network Parameters: 14167 M.
 [WARNING] DEVICE(87237,ffffb3047930,python):2023-12-30-00:14:58.432.866 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:103] Initialize] Reserved memory size for other components(1073741824) is less than recommend size(2145292800), It may lead to Out Of Memory in HCCL or other components, Please double check context key 'variable_memory_max_size'/'max_device_memory'
 /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.
  setattr(self, word, getattr(machar, word).flat[0])
 /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.
  setattr(self, word, getattr(machar, word).flat[0])
 [WARNING] PRE_ACT(87237,ffffb3047930,python):2023-12-30-00:16:38.750.638 [mindspore/ccsrc/backend/common/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.cc:84] IncreaseAllgatherFusionId] Increase the duplicated allgather fusion id
 2023-12-30 00:17:49,800 - mindformers[mindformers/trainer/utils.py:596] - INFO - .............Start load checkpoint from checkpoint..................
 2023-12-30 00:17:49,801 - mindformers[mindformers/trainer/utils.py:245] - INFO - When distributed loads are sliced weights,load_checkpoint should be a checkpoint directory containing the directory of rank_{0-*},The directory structure is as follows: **checkpoint_root_dir/rank_{0-*}/**.ckpt
 2023-12-30 00:19:26,982 - mindformers[mindformers/trainer/utils.py:258] - INFO - Distribute load is success.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:29.643.743 [mindspore/train/serialization.py:172] The type of transformer.wte.embedding_weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:32.611.345 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:32.814.382 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:32.865.933 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.210.12 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.116.387 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.206.440 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.302.318 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.534.612 [mindspore/train/serialization.py:172] The type of transformer.layers.0.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:34.565.13 [mindspore/train/serialization.py:172] The type of transformer.layers.0.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:34.558.620 [mindspore/train/serialization.py:172] The type of transformer.layers.0.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:34.906.088 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.112.506 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.210.534 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.304.827 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.397.993 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.508.718 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.604.662 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.886.504 [mindspore/train/serialization.py:172] The type of transformer.layers.1.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:36.427.475 [mindspore/train/serialization.py:172] The type of transformer.layers.1.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.186.58 [mindspore/train/serialization.py:172] The type of transformer.layers.1.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.385.227 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.598.188 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.687.603 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.802.418 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.897.706 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.916. [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.935.86 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.389.393 [mindspore/train/serialization.py:172] The type of transformer.layers.2.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.992.544 [mindspore/train/serialization.py:172] The type of transformer.layers.2.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:39.532.962 [mindspore/train/serialization.py:172] The type of transformer.layers.2.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:39.876.857 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.685.51 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.159.963 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.280.464 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.384.605 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.416.777 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.508.013 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.606.761 [mindspore/train/serialization.py:172] The type of transformer.layers.3.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.960.073 [mindspore/train/serialization.py:172] The type of transformer.layers.3.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.315.835 [mindspore/train/serialization.py:172] The type of transformer.layers.3.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.621.338 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.748.162 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.854.197 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.886.329 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.978.509 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.107.24 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.103.064 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.190.918 [mindspore/train/serialization.py:172] The type of transformer.layers.4.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.544.984 [mindspore/train/serialization.py:172] The type of transformer.layers.4.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.941.873 [mindspore/train/serialization.py:172] The type of transformer.layers.4.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.233.123 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.355.428 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.445.002 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.476.827 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.562.141 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.594.368 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.686.504 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.776.271 [mindspore/train/serialization.py:172] The type of transformer.layers.5.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.112.113 [mindspore/train/serialization.py:172] The type of transformer.layers.5.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.466.775 [mindspore/train/serialization.py:172] The type of transformer.layers.5.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.767.148 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.886.560 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.977.884 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.291.9 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.913.29 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.122.321 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.214.277 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.284.537 [mindspore/train/serialization.py:172] The type of transformer.layers.6.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.619.414 [mindspore/train/serialization.py:172] The type of transformer.layers.6.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.956.637 [mindspore/train/serialization.py:172] The type of transformer.layers.6.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.242.113 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.377.666 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.471.024 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.501.071 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.593.557 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.620.175 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.710.899 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.793.061 [mindspore/train/serialization.py:172] The type of transformer.layers.7.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.143.461 [mindspore/train/serialization.py:172] The type of transformer.layers.7.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.473.350 [mindspore/train/serialization.py:172] The type of transformer.layers.7.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.628.073 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.753.922 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.846.836 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.875.535 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.967.234 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.999.337 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.890.48 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.165.084 [mindspore/train/serialization.py:172] The type of transformer.layers.8.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.681.108 [mindspore/train/serialization.py:172] The type of transformer.layers.8.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.976.043 [mindspore/train/serialization.py:172] The type of transformer.layers.8.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.317.928 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.479.756 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.553.552 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.626.140 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.686.796 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.766.473 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.827.273 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.909.200 [mindspore/train/serialization.py:172] The type of transformer.layers.9.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:50.279.948 [mindspore/train/serialization.py:172] The type of transformer.layers.9.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:50.646.072 [mindspore/train/serialization.py:172] The type of transformer.layers.9.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:50.943.834 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.680.35 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.162.104 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.198.512 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.302.291 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.336.603 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.426.555 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.515.263 [mindspore/train/serialization.py:172] The type of transformer.layers.10.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.876.965 [mindspore/train/serialization.py:172] The type of transformer.layers.10.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.235.903 [mindspore/train/serialization.py:172] The type of transformer.layers.10.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.496.045 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.578.253 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.644.899 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.676.079 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.767.318 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.790.577 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.852.412 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.909.901 [mindspore/train/serialization.py:172] The type of transformer.layers.11.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.117.267 [mindspore/train/serialization.py:172] The type of transformer.layers.11.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.306.410 [mindspore/train/serialization.py:172] The type of transformer.layers.11.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.471.388 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.555.691 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.622.197 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.640.009 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.694.637 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.734.799 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.823.553 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.904.138 [mindspore/train/serialization.py:172] The type of transformer.layers.12.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.132.502 [mindspore/train/serialization.py:172] The type of transformer.layers.12.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.475.075 [mindspore/train/serialization.py:172] The type of transformer.layers.12.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.837.669 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.973.434 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.637.53 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.115.734 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.167.208 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.207.468 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.300.499 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.382.921 [mindspore/train/serialization.py:172] The type of transformer.layers.13.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.592.604 [mindspore/train/serialization.py:172] The type of transformer.layers.13.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.955.980 [mindspore/train/serialization.py:172] The type of transformer.layers.13.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.304.945 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.463.094 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.524.711 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.564.815 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.660.107 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.692.217 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.757.487 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.474.1 [mindspore/train/serialization.py:172] The type of transformer.layers.14.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.364.427 [mindspore/train/serialization.py:172] The type of transformer.layers.14.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.663.272 [mindspore/train/serialization.py:172] The type of transformer.layers.14.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.919.954 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.964.04 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.163.756 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.203.870 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.293.580 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.331.781 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.423.184 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.504.562 [mindspore/train/serialization.py:172] The type of transformer.layers.15.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.795.691 [mindspore/train/serialization.py:172] The type of transformer.layers.15.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.387.846 [mindspore/train/serialization.py:172] The type of transformer.layers.15.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.634.479 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.758.031 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.816.496 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.855.170 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.946.318 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.558.38 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.116.309 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.210.795 [mindspore/train/serialization.py:172] The type of transformer.layers.16.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.513.187 [mindspore/train/serialization.py:172] The type of transformer.layers.16.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.726.708 [mindspore/train/serialization.py:172] The type of transformer.layers.16.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.961.412 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.112.011 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.177.320 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.222.916 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.312.605 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.349.532 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.412.024 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.506.050 [mindspore/train/serialization.py:172] The type of transformer.layers.17.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.748.346 [mindspore/train/serialization.py:172] The type of transformer.layers.17.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.973.622 [mindspore/train/serialization.py:172] The type of transformer.layers.17.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.128.179 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.229.669 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.302.768 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.345.515 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.406.901 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.449.507 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.536.945 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.619.002 [mindspore/train/serialization.py:172] The type of transformer.layers.18.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.827.554 [mindspore/train/serialization.py:172] The type of transformer.layers.18.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.911.18 [mindspore/train/serialization.py:172] The type of transformer.layers.18.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.325.664 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.451.490 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.544.533 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.576.664 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.654.106 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.688.260 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.780.785 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.877.374 [mindspore/train/serialization.py:172] The type of transformer.layers.19.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.202.281 [mindspore/train/serialization.py:172] The type of transformer.layers.19.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.540.706 [mindspore/train/serialization.py:172] The type of transformer.layers.19.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.753.268 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.850.076 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.911.505 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.951.761 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.479.51 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.808.51 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.144.028 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.227.934 [mindspore/train/serialization.py:172] The type of transformer.layers.20.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.507.191 [mindspore/train/serialization.py:172] The type of transformer.layers.20.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.741.909 [mindspore/train/serialization.py:172] The type of transformer.layers.20.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.913.295 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.151.02 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.110.182 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.143.177 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.207.493 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.240.738 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.310.663 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.394.765 [mindspore/train/serialization.py:172] The type of transformer.layers.21.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.622.853 [mindspore/train/serialization.py:172] The type of transformer.layers.21.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.852.884 [mindspore/train/serialization.py:172] The type of transformer.layers.21.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.108.13 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.113.861 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.202.209 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.235.192 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.294.596 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.344.243 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.438.072 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.545.969 [mindspore/train/serialization.py:172] The type of transformer.layers.22.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.783.346 [mindspore/train/serialization.py:172] The type of transformer.layers.22.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.710.9 [mindspore/train/serialization.py:172] The type of transformer.layers.22.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.163.652 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.293.650 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.397.280 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.430.064 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.486.836 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.519.502 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.581.809 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.693.016 [mindspore/train/serialization.py:172] The type of transformer.layers.23.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.936.416 [mindspore/train/serialization.py:172] The type of transformer.layers.23.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.163.681 [mindspore/train/serialization.py:172] The type of transformer.layers.23.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.320.441 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.449.952 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.543.720 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.588.307 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.683.133 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.715.993 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.778.900 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.891.674 [mindspore/train/serialization.py:172] The type of transformer.layers.24.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.117.625 [mindspore/train/serialization.py:172] The type of transformer.layers.24.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.356.864 [mindspore/train/serialization.py:172] The type of transformer.layers.24.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.629.171 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.753.828 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.844.225 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.881.703 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.968.741 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.408. [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.680.79 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.154.774 [mindspore/train/serialization.py:172] The type of transformer.layers.25.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.497.490 [mindspore/train/serialization.py:172] The type of transformer.layers.25.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.847.382 [mindspore/train/serialization.py:172] The type of transformer.layers.25.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.138.687 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.269.221 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.360.666 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.400.769 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.493.261 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.528.398 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.621.801 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.710.419 [mindspore/train/serialization.py:172] The type of transformer.layers.26.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.638.50 [mindspore/train/serialization.py:172] The type of transformer.layers.26.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.445.030 [mindspore/train/serialization.py:172] The type of transformer.layers.26.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.701.809 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.826.937 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.904.706 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.944.969 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.344.63 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.699.51 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.158.195 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.264.919 [mindspore/train/serialization.py:172] The type of transformer.layers.27.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.485.471 [mindspore/train/serialization.py:172] The type of transformer.layers.27.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.705.548 [mindspore/train/serialization.py:172] The type of transformer.layers.27.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.867.368 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.993.167 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.834.85 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.130.802 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.220.610 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.259.697 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.352.288 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.455.665 [mindspore/train/serialization.py:172] The type of transformer.layers.28.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.691.411 [mindspore/train/serialization.py:172] The type of transformer.layers.28.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.925.196 [mindspore/train/serialization.py:172] The type of transformer.layers.28.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.939.77 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.224.727 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.315.400 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.347.918 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.401.128 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.450.668 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.543.591 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.646.280 [mindspore/train/serialization.py:172] The type of transformer.layers.29.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.846.158 [mindspore/train/serialization.py:172] The type of transformer.layers.29.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.331.09 [mindspore/train/serialization.py:172] The type of transformer.layers.29.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.184.317 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.300.414 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.389.959 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.429.820 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.520.308 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.567.747 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.657.636 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.728.735 [mindspore/train/serialization.py:172] The type of transformer.layers.30.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.928.203 [mindspore/train/serialization.py:172] The type of transformer.layers.30.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.114.830 [mindspore/train/serialization.py:172] The type of transformer.layers.30.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.277.994 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.404.092 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.580.551 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.607.950 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.704.398 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.740.571 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.829.036 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.905.601 [mindspore/train/serialization.py:172] The type of transformer.layers.31.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:19.272.387 [mindspore/train/serialization.py:172] The type of transformer.layers.31.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:19.625.457 [mindspore/train/serialization.py:172] The type of transformer.layers.31.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:19.923.697 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.301.02 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.885.52 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.123.411 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.212.266 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.241.343 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.332.399 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.407.361 [mindspore/train/serialization.py:172] The type of transformer.layers.32.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.715.249 [mindspore/train/serialization.py:172] The type of transformer.layers.32.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.914.293 [mindspore/train/serialization.py:172] The type of transformer.layers.32.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.674.88 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.188.920 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.268.550 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.300.973 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.385.320 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.419.537 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.506.842 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.585.341 [mindspore/train/serialization.py:172] The type of transformer.layers.33.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.769.195 [mindspore/train/serialization.py:172] The type of transformer.layers.33.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.967.155 [mindspore/train/serialization.py:172] The type of transformer.layers.33.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.122.703 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.233.893 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.319.927 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.354.878 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.458.756 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.485.193 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.572.608 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.640.210 [mindspore/train/serialization.py:172] The type of transformer.layers.34.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.840.375 [mindspore/train/serialization.py:172] The type of transformer.layers.34.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.256.54 [mindspore/train/serialization.py:172] The type of transformer.layers.34.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.172.301 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.307.490 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.395.371 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.430.829 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.515.615 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.548.733 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.635.346 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.702.520 [mindspore/train/serialization.py:172] The type of transformer.layers.35.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.901.627 [mindspore/train/serialization.py:172] The type of transformer.layers.35.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.903.65 [mindspore/train/serialization.py:172] The type of transformer.layers.35.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.246.005 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.360.536 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.419.082 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.455.698 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.540.870 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.570.835 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.665.868 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.733.669 [mindspore/train/serialization.py:172] The type of transformer.layers.36.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.922.977 [mindspore/train/serialization.py:172] The type of transformer.layers.36.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.111.864 [mindspore/train/serialization.py:172] The type of transformer.layers.36.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.263.407 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.414.406 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.501.113 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.533.688 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.616.836 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.640.137 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.795.118 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.926.389 [mindspore/train/serialization.py:172] The type of transformer.layers.37.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.303.492 [mindspore/train/serialization.py:172] The type of transformer.layers.37.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.664.528 [mindspore/train/serialization.py:172] The type of transformer.layers.37.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.809.632 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.927.201 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.211.40 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.514.75 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.141.832 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.163.909 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.226.285 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.387.562 [mindspore/train/serialization.py:172] The type of transformer.layers.38.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.636.010 [mindspore/train/serialization.py:172] The type of transformer.layers.38.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.901.214 [mindspore/train/serialization.py:172] The type of transformer.layers.38.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.500.45 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.167.980 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.253.882 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.285.260 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.373.861 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.402.797 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.504.672 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.677.928 [mindspore/train/serialization.py:172] The type of transformer.layers.39.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.975.418 [mindspore/train/serialization.py:172] The type of transformer.layers.39.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:29.212.831 [mindspore/train/serialization.py:172] The type of transformer.layers.39.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:30.718.306 [mindspore/train/serialization.py:172] The type of lm_head.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.310.992 [mindspore/train/serialization.py:1317] For 'load_param_into_net', 80 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.369 [mindspore/train/serialization.py:1322] transformer.layers.0.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.462 [mindspore/train/serialization.py:1322] transformer.layers.0.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.530 [mindspore/train/serialization.py:1322] transformer.layers.1.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.616 [mindspore/train/serialization.py:1322] transformer.layers.1.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.673 [mindspore/train/serialization.py:1322] transformer.layers.2.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.727 [mindspore/train/serialization.py:1322] transformer.layers.2.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.782 [mindspore/train/serialization.py:1322] transformer.layers.3.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.834 [mindspore/train/serialization.py:1322] transformer.layers.3.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.886 [mindspore/train/serialization.py:1322] transformer.layers.4.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.939 [mindspore/train/serialization.py:1322] transformer.layers.4.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.993 [mindspore/train/serialization.py:1322] transformer.layers.5.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.044 [mindspore/train/serialization.py:1322] transformer.layers.5.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.114 [mindspore/train/serialization.py:1322] transformer.layers.6.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.167 [mindspore/train/serialization.py:1322] transformer.layers.6.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.218 [mindspore/train/serialization.py:1322] transformer.layers.7.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.269 [mindspore/train/serialization.py:1322] transformer.layers.7.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.320 [mindspore/train/serialization.py:1322] transformer.layers.8.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.370 [mindspore/train/serialization.py:1322] transformer.layers.8.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.418 [mindspore/train/serialization.py:1322] transformer.layers.9.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.467 [mindspore/train/serialization.py:1322] transformer.layers.9.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.515 [mindspore/train/serialization.py:1322] transformer.layers.10.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.564 [mindspore/train/serialization.py:1322] transformer.layers.10.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.612 [mindspore/train/serialization.py:1322] transformer.layers.11.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.660 [mindspore/train/serialization.py:1322] transformer.layers.11.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.709 [mindspore/train/serialization.py:1322] transformer.layers.12.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.768 [mindspore/train/serialization.py:1322] transformer.layers.12.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.821 [mindspore/train/serialization.py:1322] transformer.layers.13.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.871 [mindspore/train/serialization.py:1322] transformer.layers.13.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.920 [mindspore/train/serialization.py:1322] transformer.layers.14.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.969 [mindspore/train/serialization.py:1322] transformer.layers.14.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.017 [mindspore/train/serialization.py:1322] transformer.layers.15.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.065 [mindspore/train/serialization.py:1322] transformer.layers.15.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.113 [mindspore/train/serialization.py:1322] transformer.layers.16.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.161 [mindspore/train/serialization.py:1322] transformer.layers.16.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.209 [mindspore/train/serialization.py:1322] transformer.layers.17.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.256 [mindspore/train/serialization.py:1322] transformer.layers.17.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.304 [mindspore/train/serialization.py:1322] transformer.layers.18.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.351 [mindspore/train/serialization.py:1322] transformer.layers.18.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.398 [mindspore/train/serialization.py:1322] transformer.layers.19.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.445 [mindspore/train/serialization.py:1322] transformer.layers.19.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.494 [mindspore/train/serialization.py:1322] transformer.layers.20.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.542 [mindspore/train/serialization.py:1322] transformer.layers.20.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.591 [mindspore/train/serialization.py:1322] transformer.layers.21.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.639 [mindspore/train/serialization.py:1322] transformer.layers.21.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.687 [mindspore/train/serialization.py:1322] transformer.layers.22.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.734 [mindspore/train/serialization.py:1322] transformer.layers.22.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.782 [mindspore/train/serialization.py:1322] transformer.layers.23.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.831 [mindspore/train/serialization.py:1322] transformer.layers.23.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.887 [mindspore/train/serialization.py:1322] transformer.layers.24.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.937 [mindspore/train/serialization.py:1322] transformer.layers.24.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.986 [mindspore/train/serialization.py:1322] transformer.layers.25.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.034 [mindspore/train/serialization.py:1322] transformer.layers.25.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.083 [mindspore/train/serialization.py:1322] transformer.layers.26.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.131 [mindspore/train/serialization.py:1322] transformer.layers.26.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.179 [mindspore/train/serialization.py:1322] transformer.layers.27.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.226 [mindspore/train/serialization.py:1322] transformer.layers.27.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.273 [mindspore/train/serialization.py:1322] transformer.layers.28.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.321 [mindspore/train/serialization.py:1322] transformer.layers.28.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.368 [mindspore/train/serialization.py:1322] transformer.layers.29.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.416 [mindspore/train/serialization.py:1322] transformer.layers.29.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.463 [mindspore/train/serialization.py:1322] transformer.layers.30.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.511 [mindspore/train/serialization.py:1322] transformer.layers.30.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.558 [mindspore/train/serialization.py:1322] transformer.layers.31.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.605 [mindspore/train/serialization.py:1322] transformer.layers.31.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.653 [mindspore/train/serialization.py:1322] transformer.layers.32.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.699 [mindspore/train/serialization.py:1322] transformer.layers.32.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.746 [mindspore/train/serialization.py:1322] transformer.layers.33.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.793 [mindspore/train/serialization.py:1322] transformer.layers.33.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.839 [mindspore/train/serialization.py:1322] transformer.layers.34.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.886 [mindspore/train/serialization.py:1322] transformer.layers.34.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.940 [mindspore/train/serialization.py:1322] transformer.layers.35.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.992 [mindspore/train/serialization.py:1322] transformer.layers.35.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.043 [mindspore/train/serialization.py:1322] transformer.layers.36.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.093 [mindspore/train/serialization.py:1322] transformer.layers.36.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.141 [mindspore/train/serialization.py:1322] transformer.layers.37.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.190 [mindspore/train/serialization.py:1322] transformer.layers.37.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.238 [mindspore/train/serialization.py:1322] transformer.layers.38.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.285 [mindspore/train/serialization.py:1322] transformer.layers.38.attention.kvcache_mgr.value_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.333 [mindspore/train/serialization.py:1322] transformer.layers.39.attention.kvcache_mgr.key_past is not loaded.
 [WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.380 [mindspore/train/serialization.py:1322] transformer.layers.39.attention.kvcache_mgr.value_past is not loaded.
 2023-12-30 00:20:33,315 - mindformers[mindformers/trainer/utils.py:607] - INFO - Network parameters are not loaded: (['transformer.layers.0.attention.kvcache_mgr.key_past', 'transformer.layers.0.attention.kvcache_mgr.value_past', 'transformer.layers.1.attention.kvcache_mgr.key_past', 'transformer.layers.1.attention.kvcache_mgr.value_past', 'transformer.layers.2.attention.kvcache_mgr.key_past', 'transformer.layers.2.attention.kvcache_mgr.value_past', 'transformer.layers.3.attention.kvcache_mgr.key_past', 'transformer.layers.3.attention.kvcache_mgr.value_past', 'transformer.layers.4.attention.kvcache_mgr.key_past', 'transformer.layers.4.attention.kvcache_mgr.value_past', 'transformer.layers.5.attention.kvcache_mgr.key_past', 'transformer.layers.5.attention.kvcache_mgr.value_past', 'transformer.layers.6.attention.kvcache_mgr.key_past', 'transformer.layers.6.attention.kvcache_mgr.value_past', 'transformer.layers.7.attention.kvcache_mgr.key_past', 'transformer.layers.7.attention.kvcache_mgr.value_past', 'transformer.layers.8.attention.kvcache_mgr.key_past', 'transformer.layers.8.attention.kvcache_mgr.value_past', 'transformer.layers.9.attention.kvcache_mgr.key_past', 'transformer.layers.9.attention.kvcache_mgr.value_past', 'transformer.layers.10.attention.kvcache_mgr.key_past', 'transformer.layers.10.attention.kvcache_mgr.value_past', 'transformer.layers.11.attention.kvcache_mgr.key_past', 'transformer.layers.11.attention.kvcache_mgr.value_past', 'transformer.layers.12.attention.kvcache_mgr.key_past', 'transformer.layers.12.attention.kvcache_mgr.value_past', 'transformer.layers.13.attention.kvcache_mgr.key_past', 'transformer.layers.13.attention.kvcache_mgr.value_past', 'transformer.layers.14.attention.kvcache_mgr.key_past', 'transformer.layers.14.attention.kvcache_mgr.value_past', 'transformer.layers.15.attention.kvcache_mgr.key_past', 'transformer.layers.15.attention.kvcache_mgr.value_past', 'transformer.layers.16.attention.kvcache_mgr.key_past', 'transformer.layers.16.attention.kvcache_mgr.value_past', 'transformer.layers.17.attention.kvcache_mgr.key_past', 'transformer.layers.17.attention.kvcache_mgr.value_past', 'transformer.layers.18.attention.kvcache_mgr.key_past', 'transformer.layers.18.attention.kvcache_mgr.value_past', 'transformer.layers.19.attention.kvcache_mgr.key_past', 'transformer.layers.19.attention.kvcache_mgr.value_past', 'transformer.layers.20.attention.kvcache_mgr.key_past', 'transformer.layers.20.attention.kvcache_mgr.value_past', 'transformer.layers.21.attention.kvcache_mgr.key_past', 'transformer.layers.21.attention.kvcache_mgr.value_past', 'transformer.layers.22.attention.kvcache_mgr.key_past', 'transformer.layers.22.attention.kvcache_mgr.value_past', 'transformer.layers.23.attention.kvcache_mgr.key_past', 'transformer.layers.23.attention.kvcache_mgr.value_past', 'transformer.layers.24.attention.kvcache_mgr.key_past', 'transformer.layers.24.attention.kvcache_mgr.value_past', 'transformer.layers.25.attention.kvcache_mgr.key_past', 'transformer.layers.25.attention.kvcache_mgr.value_past', 'transformer.layers.26.attention.kvcache_mgr.key_past', 'transformer.layers.26.attention.kvcache_mgr.value_past', 'transformer.layers.27.attention.kvcache_mgr.key_past', 'transformer.layers.27.attention.kvcache_mgr.value_past', 'transformer.layers.28.attention.kvcache_mgr.key_past', 'transformer.layers.28.attention.kvcache_mgr.value_past', 'transformer.layers.29.attention.kvcache_mgr.key_past', 'transformer.layers.29.attention.kvcache_mgr.value_past', 'transformer.layers.30.attention.kvcache_mgr.key_past', 'transformer.layers.30.attention.kvcache_mgr.value_past', 'transformer.layers.31.attention.kvcache_mgr.key_past', 'transformer.layers.31.attention.kvcache_mgr.value_past', 'transformer.layers.32.attention.kvcache_mgr.key_past', 'transformer.layers.32.attention.kvcache_mgr.value_past', 'transformer.layers.33.attention.kvcache_mgr.key_past', 'transformer.layers.33.attention.kvcache_mgr.value_past', 'transformer.layers.34.attention.kvcache_mgr.key_past', 'transformer.layers.34.attention.kvcache_mgr.value_past', 'transformer.layers.35.attention.kvcache_mgr.key_past', 'transformer.layers.35.attention.kvcache_mgr.value_past', 'transformer.layers.36.attention.kvcache_mgr.key_past', 'transformer.layers.36.attention.kvcache_mgr.value_past', 'transformer.layers.37.attention.kvcache_mgr.key_past', 'transformer.layers.37.attention.kvcache_mgr.value_past', 'transformer.layers.38.attention.kvcache_mgr.key_past', 'transformer.layers.38.attention.kvcache_mgr.value_past', 'transformer.layers.39.attention.kvcache_mgr.key_past', 'transformer.layers.39.attention.kvcache_mgr.value_past'], [])
 {'auto_trans_ckpt': False,
 'context': {'ascend_config': {'precision_mode': 'must_keep_origin_dtype'},
             'device_id': 0,
             'device_target': 'Ascend',
             'enable_graph_kernel': False,
             'graph_kernel_flags': '--disable_expand_ops=Softmax,Dropout '
                                   '--enable_parallel_fusion=true '
                                   '--reduce_fuse_depth=8 '
                                   '--enable_auto_tensor_inplace=true',
             'max_call_depth': 10000,
             'save_graphs': False,
             'save_graphs_path': './graph'},
 'device_num': 4,
 'infer': {'increment_model_path': '/path/qwen_7b_inc.mindir',
           'infer_seq_length': 1024,
           'prefill_model_path': '/path/qwen_7b_prefill.mindir'},
 'load_checkpoint': '/data/modelscope/Qwen-14B-Chat-ms-parallel4',
 'local_rank': 0,
 'micro_batch_interleave_num': 1,
 'model': {'arch': {'type': 'QwenForCausalLM'},
           'model_config': {'batch_size': 1,
                            'checkpoint_name_or_path': None,
                            'compute_dtype': 'float16',
                            'do_sample': False,
                            'emb_dropout_prob': 0.0,
                            'eos_token_id': 151643,
                            'hidden_size': 5120,
                            'intermediate_size': 13696,
                            'kv_channels': 128,
                            'layernorm_compute_type': 'float32',
                            'max_decode_length': 512,
                            'num_attention_heads': 40,
                            'num_hidden_layers': 40,
                            'offset': 0,
                            'pad_token_id': 151643,
                            'param_init_type': 'float16',
                            'repetition_penalty': 1,
                            'rms_norm_eps': 1e-06,
                            'rotary_dtype': 'float16',
                            'rotary_emb_base': 10000,
                            'rotary_pct': 1.0,
                            'seq_length': 8192,
                            'softmax_compute_type': 'float16',
                            'top_k': 0,
                            'top_p': 0.8,
                            'type': 'QwenConfig',
                            'use_past': True,
                            'use_past_shard': False,
                            'vocab_size': 152064}},
 'moe_config': <mindformers.modules.transformer.moe.MoEConfig object at 0xffff32a35250>,
 'only_save_strategy': False,
 'output_dir': './output',
 'parallel': {'device_num': 4,
              'enable_alltoall': False,
              'enable_parallel_optimizer': True,
              'full_batch': True,
              'gradients_mean': False,
              'parallel_mode': 'semi_auto_parallel',
              'parallel_optimizer_config': {'gradient_accumulation_shard': False,
                                            'parallel_optimizer_threshold': 64},
              'search_mode': 'sharding_propagation',
              'strategy_ckpt_save_file': './output/strategy/./ckpt_strategy_rank_0.ckpt'},
 'parallel_config': <mindformers.modules.transformer.transformer.TransformerOpParallelConfig object at 0xfffea4364250>,
 'processor': {'return_tensors': 'ms',
               'tokenizer': {'model_max_length': 8192,
                             'pad_token': '<|endoftext|>',
                             'type': 'QwenTokenizer',
                             'vocab_file': '/data/modelscope/Qwen-14B-Chat/qwen.tiktoken'},
               'type': 'QwenProcessor'},
 'rank_id': 0,
 'recompute_config': <mindformers.modules.transformer.transformer.TransformerRecomputeConfig object at 0xfffea465d8e0>,
 'resume_training': False,
 'run_mode': 'predict',
 'runner_config': {'batch_size': 1,
                   'epochs': 1,
                   'gradient_accumulation_steps': 1,
                   'sink_mode': True,
                   'sink_size': 2},
 'seed': 0,
 'src_strategy_path_or_dir': '',
 'trainer': {'model_name': 'qwen_14b', 'type': 'CausalLanguageModelingTrainer'},
 'use_parallel': True}
 2023-12-30 00:20:33,534 - mindformers[mindformers/generation/text_generator.py:1097] - INFO - Generation Config is: {'max_length': 2048, 'max_new_tokens': None, 'num_beams': 1, 'do_sample': False, 'use_past': True, 'temperature': 1.0, 'top_k': 0, 'top_p': 1.0, 'repetition_penalty': 1, 'encoder_repetition_penalty': 1.0, 'renormalize_logits': False, 'pad_token_id': 151643, 'bos_token_id': 1, 'eos_token_id': 151643, '_from_model_config': True}
 2023-12-30 00:20:33,536 - mindformers[mindformers/generation/text_generator.py:176] - INFO - The generation mode will be **GREEDY_SEARCH**.
 [WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.082 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 1 is not a tensor.
 [WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.167 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 3 is not a tensor.
 [WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.211 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 4 is not a tensor.
 [WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.239 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 5 is not a tensor.
 [WARNING] PRE_ACT(87237,ffffb3047930,python):2023-12-30-00:22:04.336.849 [mindspore/ccsrc/backend/common/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.cc:84] IncreaseAllgatherFusionId] Increase the duplicated allgather fusion id
 [WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.827.953 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 1 is not a tensor.
 [WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.828.091 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 3 is not a tensor.
 [WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.828.136 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 4 is not a tensor.
 [WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.828.166 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 5 is not a tensor.
 [ERROR] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:24:09.687.723 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:143] CheckStrategyByVector] GatherInfo19941994: The strategy is ((1, 1), (1)), strategy len: 1 is not equal to inputs len: 2, index: 1
 [ERROR] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:24:09.687.833 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:962] InitForCostModelWithAutoRepeatCalc] GatherInfo19941994: CheckStrategy failed.
 [ERROR] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:24:09.687.865 [mindspore/ccsrc/frontend/parallel/ops_info/gather_info.cc:1255] Init] GatherInfo19941994: Init failed.
 Traceback (most recent call last):
  File "/data/test1229/mindformers/research/qwen/run_qwen.py", line 165, in <module>
    main(task=args.task,
  File "/data/test1229/mindformers/research/qwen/run_qwen.py", line 113, in main
    result = task.predict(input_data=prompt,
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/_checkparam.py", line 1313, in wrapper
    return func(*args, **kwargs)
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/trainer/trainer.py", line 659, in predict
    output_result = self.trainer.predict(
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/trainer/causal_language_modeling/causal_language_modeling.py", line 315, in predict
    return self.predict_process(config=config,
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/trainer/base_trainer.py", line 878, in predict_process
    output_results = self.pipeline_task(input_data, top_k=top_k)
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/pipeline/base_pipeline.py", line 123, in __call__
    outputs = self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/pipeline/base_pipeline.py", line 170, in run_single
    model_outputs = self.forward(model_inputs, **forward_params)
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/pipeline/text_generation_pipeline.py", line 180, in forward
    output_ids = self.network.generate(input_ids, **forward_params)
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/generation/text_generator.py", line 1114, in generate
    output_ids = self._greedy_search(
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/generation/text_generator.py", line 394, in _greedy_search
    res = self._incremental_infer(
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/generation/text_generator.py", line 238, in _incremental_infer
    res = self(
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/nn/cell.py", line 680, in __call__
    out = self.compile_and_run(*args, **kwargs)
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/nn/cell.py", line 1020, in compile_and_run
    self.compile(*args, **kwargs)
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/nn/cell.py", line 997, in compile
    _cell_graph_executor.compile(self, phase=self.phase,
  File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/common/api.py", line 1547, in compile
    result = self._graph_executor.compile(obj, args, kwargs, phase, self._use_vm_mode())
 RuntimeError: Failure:operator Gather init failed

 ----------------------------------------------------
 - The Function Call Stack: (For framework developers)
 ----------------------------------------------------
 In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/models/llama/llama_layer.py:171/        freqs_cos = self.reshape(self.gather(self.freqs_cos, batch_valid_length, 0), (batch_size, 1, 1, self.head_dim))/
 In file /data/test1229/mindformers/research/qwen/qwen_model.py:294/                freqs_cis = self.freqs_mgr.increment(batch_valid_length, bs)/
 In file /data/test1229/mindformers/research/qwen/qwen_model.py:290/            if self.is_first_iteration:/
 In file /data/test1229/mindformers/research/qwen/qwen_model.py:284/        if not self.use_past:/
 In file /data/test1229/mindformers/research/qwen/qwen_model.py:125/        output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length)/
 In file /data/test1229/mindformers/research/qwen/qwen_model.py:123/            tokens = input_ids/
 In file /data/test1229/mindformers/research/qwen/qwen_model.py:120/        if self.training:/
 In file /data/test1229/mindformers/research/qwen/qwen_model.py:116/            if not isinstance(init_reset, Tensor):/
 In file /data/test1229/mindformers/research/qwen/qwen_model.py:111/    def construct(self, input_ids, labels=None, input_position=None, position_ids=None, attention_mask=None,/

 ----------------------------------------------------
 - C++ Call Stack: (For framework developers)
 ----------------------------------------------------
 mindspore/ccsrc/frontend/parallel/step_parallel.cc:1655 ExtractStrategyAndInit

 ----------------------------------------------------
 - The Traceback of Net Construct Code:
 ----------------------------------------------------

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:111
    def construct(self, input_ids, labels=None, input_position=None, position_ids=None, attention_mask=None,
    ^

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:116
            if not isinstance(init_reset, Tensor):

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:120
        if self.training:

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:123
            tokens = input_ids
            ^

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:125
        output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length)
                 ^

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:125
        output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length)
                 ^

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:125
        output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length)
                 ^

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:284
        if not self.use_past:

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:290
            if self.is_first_iteration:

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:295
                if self.is_dynamic and self.is_flexible_shape and not self.use_kvcache_op:

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:300
                    mask = self.casual_mask.increment(self.kvcache_preprocess.range, batch_valid_length)
                    ^

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:294
                freqs_cis = self.freqs_mgr.increment(batch_valid_length, bs)

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:290
            if self.is_first_iteration:

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:125
        output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length)
                 ^

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:306
        for i in range(self.num_hidden_layers):

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:307
            hidden_states = self.layers[i](hidden_states, freqs_cis, mask, kvcache_inputs=kvcache_inputs)
                            ^

 # In file /data/test1229/mindformers/research/qwen/qwen_model.py:307
            hidden_states = self.layers[i](hidden_states, freqs_cis, mask, kvcache_inputs=kvcache_inputs)
                            ^

 # In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/models/llama/llama_transformer.py:489
        h = self.attention(input_x, freqs_cis, mask, kvcache_inputs)
            ^

 # In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/models/llama/llama_transformer.py:245
        query = self.cast(self.wq(x), self.dtype)  # dp, 1 -> dp, mp
                          ^

 # In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/modules/layers.py:487
        if self.expert_flag:

 # In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/modules/layers.py:487
        if self.expert_flag:

 # In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/modules/layers.py:495
        x = self.matmul(x, weight)
            ^