File "/ptuning/main.py", line 416, in <module>
main()
File "/ptuning/main.py", line 354, in main
train_result = trainer.train(resume_from_checkpoint=checkpoint)
File "/home/searchgpt/anaconda3/envs/chatglm_lora/lib/python3.10/site-packages/transformers/trainer.py", line 1633, in train
return inner_training_loop(
File "/home/searchgpt/anaconda3/envs/chatglm_lora/lib/python3.10/site-packages/transformers/trainer.py", line 1979, in _inner_training_loop
self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
File "/home/searchgpt/anaconda3/envs/chatglm_lora/lib/python3.10/site-packages/transformers/trainer.py", line 2236, in _maybe_log_save_evaluate
metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
File "/ptuning/trainer_seq2seq.py", line 78, in evaluate
return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix)
File "/home/searchgpt/anaconda3/envs/chatglm_lora/lib/python3.10/site-packages/transformers/trainer.py", line 2932, in evaluate
output = eval_loop(
File "/home/searchgpt/anaconda3/envs/chatglm_lora/lib/python3.10/site-packages/transformers/trainer.py", line 3113, in evaluation_loop
loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
File "/ptuning/trainer_seq2seq.py", line 200, in prediction_step
generated_tokens = self.model.generate(**gen_kwargs)
File "/home/searchgpt/anaconda3/envs/chatglm_lora/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/searchgpt/anaconda3/envs/chatglm_lora/lib/python3.10/site-packages/transformers/generation/utils.py", line 1490, in generate
return self.beam_search(
File "/home/searchgpt/anaconda3/envs/chatglm_lora/lib/python3.10/site-packages/transformers/generation/utils.py", line 2836, in beam_search
sequence_outputs = beam_scorer.finalize(
File "/home/searchgpt/anaconda3/envs/chatglm_lora/lib/python3.10/site-packages/transformers/generation/beam_search.py", line 377, in finalize
decoded[i, : sent_lengths[i]] = hypo
RuntimeError: The expanded size of the tensor (140) must match the existing size (312) at non-singleton dimension 0. Target sizes: [140]. Tensor sizes: [312]
3%|█████▍
File "/home/searchgpt/anaconda3/envs/chatglm_lora/lib/python3.10/site-packages/transformers/trainer.py", line 2236, in _maybe_log_save_evaluate metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
在评估的时候报错,有一部分数据会正常,然后有有些数据评估的时候报错
运行参数: args = [ '--model_name_or_path=../../pretrained_models/chatglm-6b', '--do_train', '--do_eval', '--do_predict', '--overwrite_cache', '--overwrite_output_dir', '--per_device_train_batch_size=16', '--per_device_eval_batch_size=1', '--gradient_accumulation_steps=1', '--predict_with_generate', '--max_steps=3000', '--logging_steps=50', '--eval_steps=80', '--evaluation_strategy=steps', '--save_steps=1000', '--learning_rate=1e-2', '--output_dir=output', '--pre_seq_len=4', '--quantization_bit=4', '--max_source_length=420', '--max_target_length=140', '--val_max_target_length=140', '--num_beams=4', '--max_eval_samples=100' ]
Environment运行环境:
- tensorboard 2.12.0
- tensorboard-data-server 0.7.0
- tensorboard-plugin-wit 1.8.1
- termcolor 2.2.0
- threadpoolctl 3.1.0
- tokenizers 0.12.1
- torch 2.0.0
- torchvision 0.15.1
- tqdm 4.65.0
- transformers 4.27.4
- triton 2.0.0