diff --git a/docs/source/de/training.md b/docs/source/de/training.md index 7b1bd3e5d0c368..806a380b6cebc9 100644 --- a/docs/source/de/training.md +++ b/docs/source/de/training.md @@ -128,12 +128,12 @@ Rufen Sie [`~evaluate.compute`] auf `metric` auf, um die Genauigkeit Ihrer Vorhe ... return metric.compute(predictions=predictions, references=labels) ``` -Wenn Sie Ihre Bewertungsmetriken während der Feinabstimmung überwachen möchten, geben Sie den Parameter `evaluation_strategy` in Ihren Trainingsargumenten an, um die Bewertungsmetrik am Ende jeder Epoche zu ermitteln: +Wenn Sie Ihre Bewertungsmetriken während der Feinabstimmung überwachen möchten, geben Sie den Parameter `eval_strategy` in Ihren Trainingsargumenten an, um die Bewertungsmetrik am Ende jeder Epoche zu ermitteln: ```py >>> from transformers import TrainingArguments, Trainer ->>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch") +>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch") ``` ### Trainer diff --git a/docs/source/en/model_memory_anatomy.md b/docs/source/en/model_memory_anatomy.md index c820681a7af0fc..1fc7b495932aff 100644 --- a/docs/source/en/model_memory_anatomy.md +++ b/docs/source/en/model_memory_anatomy.md @@ -145,7 +145,7 @@ arguments: ```py default_args = { "output_dir": "tmp", - "evaluation_strategy": "steps", + "eval_strategy": "steps", "num_train_epochs": 1, "log_level": "error", "report_to": "none", diff --git a/docs/source/en/tasks/asr.md b/docs/source/en/tasks/asr.md index 737460ed297bcf..a1a96271102ba4 100644 --- a/docs/source/en/tasks/asr.md +++ b/docs/source/en/tasks/asr.md @@ -270,7 +270,7 @@ At this point, only three steps remain: ... gradient_checkpointing=True, ... fp16=True, ... group_by_length=True, -... evaluation_strategy="steps", +... eval_strategy="steps", ... per_device_eval_batch_size=8, ... save_steps=1000, ... eval_steps=1000, diff --git a/docs/source/en/tasks/audio_classification.md b/docs/source/en/tasks/audio_classification.md index 678af90c4fa079..5ea3567f4c3c6c 100644 --- a/docs/source/en/tasks/audio_classification.md +++ b/docs/source/en/tasks/audio_classification.md @@ -221,7 +221,7 @@ At this point, only three steps remain: ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_mind_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... learning_rate=3e-5, ... per_device_train_batch_size=32, diff --git a/docs/source/en/tasks/document_question_answering.md b/docs/source/en/tasks/document_question_answering.md index 24bf3a069ac9a5..3d3acf0541dbf9 100644 --- a/docs/source/en/tasks/document_question_answering.md +++ b/docs/source/en/tasks/document_question_answering.md @@ -399,7 +399,7 @@ In this case the `output_dir` will also be the name of the repo where your model ... num_train_epochs=20, ... save_steps=200, ... logging_steps=50, -... evaluation_strategy="steps", +... eval_strategy="steps", ... learning_rate=5e-5, ... save_total_limit=2, ... remove_unused_columns=False, diff --git a/docs/source/en/tasks/image_captioning.md b/docs/source/en/tasks/image_captioning.md index b426cbf6383187..633ccc491ebb35 100644 --- a/docs/source/en/tasks/image_captioning.md +++ b/docs/source/en/tasks/image_captioning.md @@ -196,7 +196,7 @@ training_args = TrainingArguments( per_device_eval_batch_size=32, gradient_accumulation_steps=2, save_total_limit=3, - evaluation_strategy="steps", + eval_strategy="steps", eval_steps=50, save_strategy="steps", save_steps=50, diff --git a/docs/source/en/tasks/image_classification.md b/docs/source/en/tasks/image_classification.md index 30c517f3be6499..25f232bc00a728 100644 --- a/docs/source/en/tasks/image_classification.md +++ b/docs/source/en/tasks/image_classification.md @@ -302,7 +302,7 @@ At this point, only three steps remain: >>> training_args = TrainingArguments( ... output_dir="my_awesome_food_model", ... remove_unused_columns=False, -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... learning_rate=5e-5, ... per_device_train_batch_size=16, diff --git a/docs/source/en/tasks/knowledge_distillation_for_image_classification.md b/docs/source/en/tasks/knowledge_distillation_for_image_classification.md index 8448e53011494c..f856e35b1740bd 100644 --- a/docs/source/en/tasks/knowledge_distillation_for_image_classification.md +++ b/docs/source/en/tasks/knowledge_distillation_for_image_classification.md @@ -112,7 +112,7 @@ training_args = TrainingArguments( fp16=True, logging_dir=f"{repo_name}/logs", logging_strategy="epoch", - evaluation_strategy="epoch", + eval_strategy="epoch", save_strategy="epoch", load_best_model_at_end=True, metric_for_best_model="accuracy", diff --git a/docs/source/en/tasks/language_modeling.md b/docs/source/en/tasks/language_modeling.md index 88a45192713912..380c089d059f34 100644 --- a/docs/source/en/tasks/language_modeling.md +++ b/docs/source/en/tasks/language_modeling.md @@ -249,7 +249,7 @@ At this point, only three steps remain: ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_eli5_clm-model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... weight_decay=0.01, ... push_to_hub=True, diff --git a/docs/source/en/tasks/masked_language_modeling.md b/docs/source/en/tasks/masked_language_modeling.md index de91cd587a6a0c..1736e858eeb36e 100644 --- a/docs/source/en/tasks/masked_language_modeling.md +++ b/docs/source/en/tasks/masked_language_modeling.md @@ -238,7 +238,7 @@ At this point, only three steps remain: ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_eli5_mlm_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... num_train_epochs=3, ... weight_decay=0.01, diff --git a/docs/source/en/tasks/multiple_choice.md b/docs/source/en/tasks/multiple_choice.md index 5cf17448f0a66a..9baa0eea5d5934 100644 --- a/docs/source/en/tasks/multiple_choice.md +++ b/docs/source/en/tasks/multiple_choice.md @@ -265,7 +265,7 @@ At this point, only three steps remain: ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_swag_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... load_best_model_at_end=True, ... learning_rate=5e-5, diff --git a/docs/source/en/tasks/question_answering.md b/docs/source/en/tasks/question_answering.md index 2c4706ad93b001..724e51d0dc9f5d 100644 --- a/docs/source/en/tasks/question_answering.md +++ b/docs/source/en/tasks/question_answering.md @@ -218,7 +218,7 @@ At this point, only three steps remain: ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_qa_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/en/tasks/semantic_segmentation.md b/docs/source/en/tasks/semantic_segmentation.md index 675f9222cafd21..048a1d38d003b6 100644 --- a/docs/source/en/tasks/semantic_segmentation.md +++ b/docs/source/en/tasks/semantic_segmentation.md @@ -535,7 +535,7 @@ At this point, only three steps remain: ... per_device_train_batch_size=2, ... per_device_eval_batch_size=2, ... save_total_limit=3, -... evaluation_strategy="steps", +... eval_strategy="steps", ... save_strategy="steps", ... save_steps=20, ... eval_steps=20, diff --git a/docs/source/en/tasks/sequence_classification.md b/docs/source/en/tasks/sequence_classification.md index 572bc6a8b82418..67a792b43edac7 100644 --- a/docs/source/en/tasks/sequence_classification.md +++ b/docs/source/en/tasks/sequence_classification.md @@ -187,7 +187,7 @@ At this point, only three steps remain: ... per_device_eval_batch_size=16, ... num_train_epochs=2, ... weight_decay=0.01, -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... load_best_model_at_end=True, ... push_to_hub=True, diff --git a/docs/source/en/tasks/summarization.md b/docs/source/en/tasks/summarization.md index 28dd3f5a49ebe3..37a305a4ac008e 100644 --- a/docs/source/en/tasks/summarization.md +++ b/docs/source/en/tasks/summarization.md @@ -202,7 +202,7 @@ At this point, only three steps remain: ```py >>> training_args = Seq2SeqTrainingArguments( ... output_dir="my_awesome_billsum_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/en/tasks/text-to-speech.md b/docs/source/en/tasks/text-to-speech.md index 0b324904e9e263..494e20009529ce 100644 --- a/docs/source/en/tasks/text-to-speech.md +++ b/docs/source/en/tasks/text-to-speech.md @@ -477,7 +477,7 @@ only look at the loss: ... max_steps=4000, ... gradient_checkpointing=True, ... fp16=True, -... evaluation_strategy="steps", +... eval_strategy="steps", ... per_device_eval_batch_size=2, ... save_steps=1000, ... eval_steps=1000, diff --git a/docs/source/en/tasks/token_classification.md b/docs/source/en/tasks/token_classification.md index 791737b677c871..d0e4e87963f9b1 100644 --- a/docs/source/en/tasks/token_classification.md +++ b/docs/source/en/tasks/token_classification.md @@ -290,7 +290,7 @@ At this point, only three steps remain: ... per_device_eval_batch_size=16, ... num_train_epochs=2, ... weight_decay=0.01, -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... load_best_model_at_end=True, ... push_to_hub=True, diff --git a/docs/source/en/tasks/translation.md b/docs/source/en/tasks/translation.md index f0433a0dad797d..172b06e546f8a7 100644 --- a/docs/source/en/tasks/translation.md +++ b/docs/source/en/tasks/translation.md @@ -209,7 +209,7 @@ At this point, only three steps remain: ```py >>> training_args = Seq2SeqTrainingArguments( ... output_dir="my_awesome_opus_books_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/en/tasks/video_classification.md b/docs/source/en/tasks/video_classification.md index 38bdceba41b7b4..1a0b8deeb1d34a 100644 --- a/docs/source/en/tasks/video_classification.md +++ b/docs/source/en/tasks/video_classification.md @@ -354,7 +354,7 @@ Most of the training arguments are self-explanatory, but one that is quite impor >>> args = TrainingArguments( ... new_model_name, ... remove_unused_columns=False, -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... learning_rate=5e-5, ... per_device_train_batch_size=batch_size, diff --git a/docs/source/en/trainer.md b/docs/source/en/trainer.md index 3d57220fe827d9..b69bebd6ea2004 100644 --- a/docs/source/en/trainer.md +++ b/docs/source/en/trainer.md @@ -62,7 +62,7 @@ training_args = TrainingArguments( per_device_eval_batch_size=16, num_train_epochs=2, weight_decay=0.01, - evaluation_strategy="epoch", + eval_strategy="epoch", save_strategy="epoch", load_best_model_at_end=True, push_to_hub=True, diff --git a/docs/source/en/training.md b/docs/source/en/training.md index 4bd72aa9f6384d..cea583c05ebc7d 100644 --- a/docs/source/en/training.md +++ b/docs/source/en/training.md @@ -128,12 +128,12 @@ Call [`~evaluate.compute`] on `metric` to calculate the accuracy of your predict ... return metric.compute(predictions=predictions, references=labels) ``` -If you'd like to monitor your evaluation metrics during fine-tuning, specify the `evaluation_strategy` parameter in your training arguments to report the evaluation metric at the end of each epoch: +If you'd like to monitor your evaluation metrics during fine-tuning, specify the `eval_strategy` parameter in your training arguments to report the evaluation metric at the end of each epoch: ```py >>> from transformers import TrainingArguments, Trainer ->>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch") +>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch") ``` ### Trainer diff --git a/docs/source/es/tasks/asr.md b/docs/source/es/tasks/asr.md index 850bdfd711e7e0..7d3133af472f64 100644 --- a/docs/source/es/tasks/asr.md +++ b/docs/source/es/tasks/asr.md @@ -260,7 +260,7 @@ En este punto, solo quedan tres pasos: ... gradient_checkpointing=True, ... fp16=True, ... group_by_length=True, -... evaluation_strategy="steps", +... eval_strategy="steps", ... per_device_eval_batch_size=8, ... save_steps=1000, ... eval_steps=1000, diff --git a/docs/source/es/tasks/image_captioning.md b/docs/source/es/tasks/image_captioning.md index f06f6eda0a7576..620dcec1bfbd1c 100644 --- a/docs/source/es/tasks/image_captioning.md +++ b/docs/source/es/tasks/image_captioning.md @@ -188,7 +188,7 @@ training_args = TrainingArguments( per_device_eval_batch_size=32, gradient_accumulation_steps=2, save_total_limit=3, - evaluation_strategy="steps", + eval_strategy="steps", eval_steps=50, save_strategy="steps", save_steps=50, diff --git a/docs/source/es/tasks/image_classification.md b/docs/source/es/tasks/image_classification.md index f09730caf69fee..4e3696c505b030 100644 --- a/docs/source/es/tasks/image_classification.md +++ b/docs/source/es/tasks/image_classification.md @@ -143,7 +143,7 @@ Al llegar a este punto, solo quedan tres pasos: >>> training_args = TrainingArguments( ... output_dir="./results", ... per_device_train_batch_size=16, -... evaluation_strategy="steps", +... eval_strategy="steps", ... num_train_epochs=4, ... fp16=True, ... save_steps=100, diff --git a/docs/source/es/tasks/language_modeling.md b/docs/source/es/tasks/language_modeling.md index 010d1bccae7bbf..73bfc4d650f131 100644 --- a/docs/source/es/tasks/language_modeling.md +++ b/docs/source/es/tasks/language_modeling.md @@ -232,7 +232,7 @@ A este punto, solo faltan tres pasos: ```py >>> training_args = TrainingArguments( ... output_dir="./results", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... weight_decay=0.01, ... ) @@ -338,7 +338,7 @@ A este punto, solo faltan tres pasos: ```py >>> training_args = TrainingArguments( ... output_dir="./results", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... num_train_epochs=3, ... weight_decay=0.01, diff --git a/docs/source/es/tasks/multiple_choice.md b/docs/source/es/tasks/multiple_choice.md index ca2e3d15f63546..959416f149c357 100644 --- a/docs/source/es/tasks/multiple_choice.md +++ b/docs/source/es/tasks/multiple_choice.md @@ -212,7 +212,7 @@ En este punto, solo quedan tres pasos: ```py >>> training_args = TrainingArguments( ... output_dir="./results", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=5e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/es/tasks/question_answering.md b/docs/source/es/tasks/question_answering.md index 5cd59f6b064f71..ca43aac9ae9e7a 100644 --- a/docs/source/es/tasks/question_answering.md +++ b/docs/source/es/tasks/question_answering.md @@ -182,7 +182,7 @@ En este punto, solo quedan tres pasos: ```py >>> training_args = TrainingArguments( ... output_dir="./results", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/es/tasks/summarization.md b/docs/source/es/tasks/summarization.md index 19ceb90b22cbb2..e6a9532f660387 100644 --- a/docs/source/es/tasks/summarization.md +++ b/docs/source/es/tasks/summarization.md @@ -140,7 +140,7 @@ En este punto, solo faltan tres pasos: ```py >>> training_args = Seq2SeqTrainingArguments( ... output_dir="./results", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/es/trainer.md b/docs/source/es/trainer.md index 9a36e3867c17e3..57fcaa62900572 100644 --- a/docs/source/es/trainer.md +++ b/docs/source/es/trainer.md @@ -60,7 +60,7 @@ training_args = TrainingArguments( per_device_eval_batch_size=16, num_train_epochs=2, weight_decay=0.01, - evaluation_strategy="epoch", + eval_strategy="epoch", save_strategy="epoch", load_best_model_at_end=True, push_to_hub=True, diff --git a/docs/source/es/training.md b/docs/source/es/training.md index fef44ed3f9ff72..f10f49d08997ac 100644 --- a/docs/source/es/training.md +++ b/docs/source/es/training.md @@ -120,12 +120,12 @@ Define la función `compute` en `metric` para calcular el accuracy de tus predic ... return metric.compute(predictions=predictions, references=labels) ``` -Si quieres controlar tus métricas de evaluación durante el fine-tuning, especifica el parámetro `evaluation_strategy` en tus argumentos de entrenamiento para que el modelo tenga en cuenta la métrica de evaluación al final de cada época: +Si quieres controlar tus métricas de evaluación durante el fine-tuning, especifica el parámetro `eval_strategy` en tus argumentos de entrenamiento para que el modelo tenga en cuenta la métrica de evaluación al final de cada época: ```py >>> from transformers import TrainingArguments ->>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch") +>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch") ``` ### Trainer diff --git a/docs/source/it/migration.md b/docs/source/it/migration.md index 9a5f4d005505e8..07d31705784e7f 100644 --- a/docs/source/it/migration.md +++ b/docs/source/it/migration.md @@ -167,7 +167,7 @@ Per quanto riguarda la classe `Trainer`: - Il metodo `is_world_master` di `Trainer` è deprecato a favore di `is_world_process_zero`. Per quanto riguarda la classe `TrainingArguments`: -- L'argomento `evaluate_during_training` di `TrainingArguments` è deprecato a favore di `evaluation_strategy`. +- L'argomento `evaluate_during_training` di `TrainingArguments` è deprecato a favore di `eval_strategy`. Per quanto riguarda il modello Transfo-XL: - L'attributo di configurazione `tie_weight` di Transfo-XL diventa `tie_words_embeddings`. diff --git a/docs/source/it/training.md b/docs/source/it/training.md index 2a64cfca375f69..21008a92bf7c6f 100644 --- a/docs/source/it/training.md +++ b/docs/source/it/training.md @@ -121,12 +121,12 @@ Richiama `compute` su `metric` per calcolare l'accuratezza delle tue previsioni. ... return metric.compute(predictions=predictions, references=labels) ``` -Se preferisci monitorare le tue metriche di valutazione durante il fine-tuning, specifica il parametro `evaluation_strategy` nei tuoi training arguments per restituire le metriche di valutazione ad ogni epoca di addestramento: +Se preferisci monitorare le tue metriche di valutazione durante il fine-tuning, specifica il parametro `eval_strategy` nei tuoi training arguments per restituire le metriche di valutazione ad ogni epoca di addestramento: ```py >>> from transformers import TrainingArguments, Trainer ->>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch") +>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch") ``` ### Trainer diff --git a/docs/source/ja/model_memory_anatomy.md b/docs/source/ja/model_memory_anatomy.md index 5f09489b7f79aa..45a383d616ad34 100644 --- a/docs/source/ja/model_memory_anatomy.md +++ b/docs/source/ja/model_memory_anatomy.md @@ -136,7 +136,7 @@ Tue Jan 11 08:58:05 2022 ```py default_args = { "output_dir": "tmp", - "evaluation_strategy": "steps", + "eval_strategy": "steps", "num_train_epochs": 1, "log_level": "error", "report_to": "none", diff --git a/docs/source/ja/tasks/asr.md b/docs/source/ja/tasks/asr.md index fd564abdc5c908..6d5f65461d215b 100644 --- a/docs/source/ja/tasks/asr.md +++ b/docs/source/ja/tasks/asr.md @@ -270,7 +270,7 @@ MInDS-14 データセットのサンプリング レートは 8000kHz です ( ... gradient_checkpointing=True, ... fp16=True, ... group_by_length=True, -... evaluation_strategy="steps", +... eval_strategy="steps", ... per_device_eval_batch_size=8, ... save_steps=1000, ... eval_steps=1000, diff --git a/docs/source/ja/tasks/audio_classification.md b/docs/source/ja/tasks/audio_classification.md index 58d42f3f4d4ff1..6f4d0dd171846a 100644 --- a/docs/source/ja/tasks/audio_classification.md +++ b/docs/source/ja/tasks/audio_classification.md @@ -221,7 +221,7 @@ MInDS-14 データセットのサンプリング レートは 8000khz です ( ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_mind_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... learning_rate=3e-5, ... per_device_train_batch_size=32, diff --git a/docs/source/ja/tasks/document_question_answering.md b/docs/source/ja/tasks/document_question_answering.md index 478c6af2235490..ec88f262086cf5 100644 --- a/docs/source/ja/tasks/document_question_answering.md +++ b/docs/source/ja/tasks/document_question_answering.md @@ -403,7 +403,7 @@ end_index 18 ... num_train_epochs=20, ... save_steps=200, ... logging_steps=50, -... evaluation_strategy="steps", +... eval_strategy="steps", ... learning_rate=5e-5, ... save_total_limit=2, ... remove_unused_columns=False, diff --git a/docs/source/ja/tasks/image_captioning.md b/docs/source/ja/tasks/image_captioning.md index 31c687c111c071..7649947b2c6450 100644 --- a/docs/source/ja/tasks/image_captioning.md +++ b/docs/source/ja/tasks/image_captioning.md @@ -194,7 +194,7 @@ training_args = TrainingArguments( per_device_eval_batch_size=32, gradient_accumulation_steps=2, save_total_limit=3, - evaluation_strategy="steps", + eval_strategy="steps", eval_steps=50, save_strategy="steps", save_steps=50, diff --git a/docs/source/ja/tasks/image_classification.md b/docs/source/ja/tasks/image_classification.md index f8d8d0d55238b9..f16e46c26fc316 100644 --- a/docs/source/ja/tasks/image_classification.md +++ b/docs/source/ja/tasks/image_classification.md @@ -308,7 +308,7 @@ food["test"].set_transform(preprocess_val) >>> training_args = TrainingArguments( ... output_dir="my_awesome_food_model", ... remove_unused_columns=False, -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... learning_rate=5e-5, ... per_device_train_batch_size=16, diff --git a/docs/source/ja/tasks/knowledge_distillation_for_image_classification.md b/docs/source/ja/tasks/knowledge_distillation_for_image_classification.md index 16df6e3b9d9658..30c0dbbf063040 100644 --- a/docs/source/ja/tasks/knowledge_distillation_for_image_classification.md +++ b/docs/source/ja/tasks/knowledge_distillation_for_image_classification.md @@ -112,7 +112,7 @@ training_args = TrainingArguments( fp16=True, logging_dir=f"{repo_name}/logs", logging_strategy="epoch", - evaluation_strategy="epoch", + eval_strategy="epoch", save_strategy="epoch", load_best_model_at_end=True, metric_for_best_model="accuracy", diff --git a/docs/source/ja/tasks/language_modeling.md b/docs/source/ja/tasks/language_modeling.md index 835a0d54ea4ffd..1d1bcab0b3757a 100644 --- a/docs/source/ja/tasks/language_modeling.md +++ b/docs/source/ja/tasks/language_modeling.md @@ -246,7 +246,7 @@ Apply the `group_texts` function over the entire dataset: ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_eli5_clm-model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... weight_decay=0.01, ... push_to_hub=True, diff --git a/docs/source/ja/tasks/masked_language_modeling.md b/docs/source/ja/tasks/masked_language_modeling.md index b0fff72f9b0e26..29488d5c71e44e 100644 --- a/docs/source/ja/tasks/masked_language_modeling.md +++ b/docs/source/ja/tasks/masked_language_modeling.md @@ -231,7 +231,7 @@ pip install transformers datasets evaluate ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_eli5_mlm_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... num_train_epochs=3, ... weight_decay=0.01, diff --git a/docs/source/ja/tasks/multiple_choice.md b/docs/source/ja/tasks/multiple_choice.md index bfe5f388cb4ab6..045c9112932dba 100644 --- a/docs/source/ja/tasks/multiple_choice.md +++ b/docs/source/ja/tasks/multiple_choice.md @@ -266,7 +266,7 @@ tokenized_swag = swag.map(preprocess_function, batched=True) ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_swag_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... load_best_model_at_end=True, ... learning_rate=5e-5, diff --git a/docs/source/ja/tasks/question_answering.md b/docs/source/ja/tasks/question_answering.md index 54df687c2f047f..d7feac56076ffa 100644 --- a/docs/source/ja/tasks/question_answering.md +++ b/docs/source/ja/tasks/question_answering.md @@ -220,7 +220,7 @@ pip install transformers datasets evaluate ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_qa_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/ja/tasks/semantic_segmentation.md b/docs/source/ja/tasks/semantic_segmentation.md index 2816688b4e1c14..572280c1962ede 100644 --- a/docs/source/ja/tasks/semantic_segmentation.md +++ b/docs/source/ja/tasks/semantic_segmentation.md @@ -323,7 +323,7 @@ pip install -q datasets transformers evaluate ... per_device_train_batch_size=2, ... per_device_eval_batch_size=2, ... save_total_limit=3, -... evaluation_strategy="steps", +... eval_strategy="steps", ... save_strategy="steps", ... save_steps=20, ... eval_steps=20, diff --git a/docs/source/ja/tasks/sequence_classification.md b/docs/source/ja/tasks/sequence_classification.md index 767d5e03cdf607..c97644ca10fad6 100644 --- a/docs/source/ja/tasks/sequence_classification.md +++ b/docs/source/ja/tasks/sequence_classification.md @@ -324,7 +324,7 @@ pip install -q datasets transformers evaluate ... per_device_train_batch_size=2, ... per_device_eval_batch_size=2, ... save_total_limit=3, -... evaluation_strategy="steps", +... eval_strategy="steps", ... save_strategy="steps", ... save_steps=20, ... eval_steps=20, diff --git a/docs/source/ja/tasks/summarization.md b/docs/source/ja/tasks/summarization.md index a4b012d712f2e7..04f1a53d13f2c6 100644 --- a/docs/source/ja/tasks/summarization.md +++ b/docs/source/ja/tasks/summarization.md @@ -204,7 +204,7 @@ pip install transformers datasets evaluate rouge_score ```py >>> training_args = Seq2SeqTrainingArguments( ... output_dir="my_awesome_billsum_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/ja/tasks/text-to-speech.md b/docs/source/ja/tasks/text-to-speech.md index 357ec18855149e..b302a19a0d5818 100644 --- a/docs/source/ja/tasks/text-to-speech.md +++ b/docs/source/ja/tasks/text-to-speech.md @@ -477,7 +477,7 @@ SpeechT5 では、モデルのデコーダ部分への入力が 2 分の 1 に ... max_steps=4000, ... gradient_checkpointing=True, ... fp16=True, -... evaluation_strategy="steps", +... eval_strategy="steps", ... per_device_eval_batch_size=2, ... save_steps=1000, ... eval_steps=1000, diff --git a/docs/source/ja/tasks/token_classification.md b/docs/source/ja/tasks/token_classification.md index 2b650c4a844d84..497584674252ad 100644 --- a/docs/source/ja/tasks/token_classification.md +++ b/docs/source/ja/tasks/token_classification.md @@ -288,7 +288,7 @@ pip install transformers datasets evaluate seqeval ... per_device_eval_batch_size=16, ... num_train_epochs=2, ... weight_decay=0.01, -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... load_best_model_at_end=True, ... push_to_hub=True, diff --git a/docs/source/ja/tasks/translation.md b/docs/source/ja/tasks/translation.md index fb2c89f3856d49..ed5bd24777e14d 100644 --- a/docs/source/ja/tasks/translation.md +++ b/docs/source/ja/tasks/translation.md @@ -208,7 +208,7 @@ pip install transformers datasets evaluate sacrebleu ```py >>> training_args = Seq2SeqTrainingArguments( ... output_dir="my_awesome_opus_books_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/ja/tasks/video_classification.md b/docs/source/ja/tasks/video_classification.md index e0c383619411bf..688cb701496f79 100644 --- a/docs/source/ja/tasks/video_classification.md +++ b/docs/source/ja/tasks/video_classification.md @@ -360,7 +360,7 @@ You should probably TRAIN this model on a down-stream task to be able to use it >>> args = TrainingArguments( ... new_model_name, ... remove_unused_columns=False, -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... learning_rate=5e-5, ... per_device_train_batch_size=batch_size, diff --git a/docs/source/ja/training.md b/docs/source/ja/training.md index 79fbb1b7fb2571..9dd2369601c10a 100644 --- a/docs/source/ja/training.md +++ b/docs/source/ja/training.md @@ -135,12 +135,12 @@ BERTモデルの事前学習済みのヘッドは破棄され、ランダムに ... return metric.compute(predictions=predictions, references=labels) ``` -評価メトリクスをファインチューニング中に監視したい場合、トレーニング引数で `evaluation_strategy` パラメータを指定して、各エポックの終了時に評価メトリクスを報告します: +評価メトリクスをファインチューニング中に監視したい場合、トレーニング引数で `eval_strategy` パラメータを指定して、各エポックの終了時に評価メトリクスを報告します: ```python >>> from transformers import TrainingArguments, Trainer ->>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch") +>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch") ``` ### Trainer diff --git a/docs/source/ko/model_memory_anatomy.md b/docs/source/ko/model_memory_anatomy.md index 5701e19aaa085d..a5c3a0f35292af 100644 --- a/docs/source/ko/model_memory_anatomy.md +++ b/docs/source/ko/model_memory_anatomy.md @@ -132,7 +132,7 @@ Tue Jan 11 08:58:05 2022 ```py default_args = { "output_dir": "tmp", - "evaluation_strategy": "steps", + "eval_strategy": "steps", "num_train_epochs": 1, "log_level": "error", "report_to": "none", diff --git a/docs/source/ko/tasks/asr.md b/docs/source/ko/tasks/asr.md index 47a568ecf02bb4..474d60bf2d1a19 100644 --- a/docs/source/ko/tasks/asr.md +++ b/docs/source/ko/tasks/asr.md @@ -274,7 +274,7 @@ MInDS-14 데이터 세트의 샘플링 레이트는 8000kHz이므로([데이터 ... gradient_checkpointing=True, ... fp16=True, ... group_by_length=True, -... evaluation_strategy="steps", +... eval_strategy="steps", ... per_device_eval_batch_size=8, ... save_steps=1000, ... eval_steps=1000, diff --git a/docs/source/ko/tasks/audio_classification.md b/docs/source/ko/tasks/audio_classification.md index 7e1094815fd429..c9ef810e8ef4f4 100644 --- a/docs/source/ko/tasks/audio_classification.md +++ b/docs/source/ko/tasks/audio_classification.md @@ -221,7 +221,7 @@ MinDS-14 데이터 세트의 샘플링 속도는 8000khz이므로(이 정보는 ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_mind_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... learning_rate=3e-5, ... per_device_train_batch_size=32, diff --git a/docs/source/ko/tasks/document_question_answering.md b/docs/source/ko/tasks/document_question_answering.md index b9e98f3bf67235..920eb99ea52960 100644 --- a/docs/source/ko/tasks/document_question_answering.md +++ b/docs/source/ko/tasks/document_question_answering.md @@ -385,7 +385,7 @@ end_index 18 ... num_train_epochs=20, ... save_steps=200, ... logging_steps=50, -... evaluation_strategy="steps", +... eval_strategy="steps", ... learning_rate=5e-5, ... save_total_limit=2, ... remove_unused_columns=False, diff --git a/docs/source/ko/tasks/image_captioning.md b/docs/source/ko/tasks/image_captioning.md index c5139649a9185b..c4d0f99b6170ee 100644 --- a/docs/source/ko/tasks/image_captioning.md +++ b/docs/source/ko/tasks/image_captioning.md @@ -201,7 +201,7 @@ training_args = TrainingArguments( per_device_eval_batch_size=32, gradient_accumulation_steps=2, save_total_limit=3, - evaluation_strategy="steps", + eval_strategy="steps", eval_steps=50, save_strategy="steps", save_steps=50, diff --git a/docs/source/ko/tasks/image_classification.md b/docs/source/ko/tasks/image_classification.md index 031e01ea5c5a83..d647b4512b038a 100644 --- a/docs/source/ko/tasks/image_classification.md +++ b/docs/source/ko/tasks/image_classification.md @@ -301,7 +301,7 @@ food["test"].set_transform(preprocess_val) >>> training_args = TrainingArguments( ... output_dir="my_awesome_food_model", ... remove_unused_columns=False, -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... learning_rate=5e-5, ... per_device_train_batch_size=16, diff --git a/docs/source/ko/tasks/language_modeling.md b/docs/source/ko/tasks/language_modeling.md index ee1d11c1d09daf..b98c64dcc3adae 100644 --- a/docs/source/ko/tasks/language_modeling.md +++ b/docs/source/ko/tasks/language_modeling.md @@ -233,7 +233,7 @@ pip install transformers datasets evaluate ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_eli5_clm-model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... weight_decay=0.01, ... push_to_hub=True, diff --git a/docs/source/ko/tasks/masked_language_modeling.md b/docs/source/ko/tasks/masked_language_modeling.md index 3aafdf1cb9eebe..c710dbf168ed01 100644 --- a/docs/source/ko/tasks/masked_language_modeling.md +++ b/docs/source/ko/tasks/masked_language_modeling.md @@ -236,7 +236,7 @@ Hugging Face 계정에 로그인하여 모델을 업로드하고 커뮤니티와 ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_eli5_mlm_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... num_train_epochs=3, ... weight_decay=0.01, diff --git a/docs/source/ko/tasks/multiple_choice.md b/docs/source/ko/tasks/multiple_choice.md index 4e02f7fabe504f..b28654ea4f1438 100644 --- a/docs/source/ko/tasks/multiple_choice.md +++ b/docs/source/ko/tasks/multiple_choice.md @@ -265,7 +265,7 @@ tokenized_swag = swag.map(preprocess_function, batched=True) ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_swag_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... load_best_model_at_end=True, ... learning_rate=5e-5, diff --git a/docs/source/ko/tasks/question_answering.md b/docs/source/ko/tasks/question_answering.md index 9539b9a403030e..7fe8ba3a5f08d0 100644 --- a/docs/source/ko/tasks/question_answering.md +++ b/docs/source/ko/tasks/question_answering.md @@ -215,7 +215,7 @@ pip install transformers datasets evaluate ```py >>> training_args = TrainingArguments( ... output_dir="my_awesome_qa_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/ko/tasks/semantic_segmentation.md b/docs/source/ko/tasks/semantic_segmentation.md index 4b6109d692bf10..0afa4bbe020f7c 100644 --- a/docs/source/ko/tasks/semantic_segmentation.md +++ b/docs/source/ko/tasks/semantic_segmentation.md @@ -317,7 +317,7 @@ pip install -q datasets transformers evaluate ... per_device_train_batch_size=2, ... per_device_eval_batch_size=2, ... save_total_limit=3, -... evaluation_strategy="steps", +... eval_strategy="steps", ... save_strategy="steps", ... save_steps=20, ... eval_steps=20, diff --git a/docs/source/ko/tasks/sequence_classification.md b/docs/source/ko/tasks/sequence_classification.md index a1a5da50e9f614..9cf6b9f52433a3 100644 --- a/docs/source/ko/tasks/sequence_classification.md +++ b/docs/source/ko/tasks/sequence_classification.md @@ -185,7 +185,7 @@ tokenized_imdb = imdb.map(preprocess_function, batched=True) ... per_device_eval_batch_size=16, ... num_train_epochs=2, ... weight_decay=0.01, -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... load_best_model_at_end=True, ... push_to_hub=True, diff --git a/docs/source/ko/tasks/summarization.md b/docs/source/ko/tasks/summarization.md index 43eae25d79f0aa..62e410757e464e 100644 --- a/docs/source/ko/tasks/summarization.md +++ b/docs/source/ko/tasks/summarization.md @@ -211,7 +211,7 @@ Hugging Face 계정에 로그인하면 모델을 업로드하고 커뮤니티에 ```py >>> training_args = Seq2SeqTrainingArguments( ... output_dir="my_awesome_billsum_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/ko/tasks/token_classification.md b/docs/source/ko/tasks/token_classification.md index 1e49d79a0d7235..5bb3989d45944f 100644 --- a/docs/source/ko/tasks/token_classification.md +++ b/docs/source/ko/tasks/token_classification.md @@ -288,7 +288,7 @@ Hugging Face 계정에 로그인하여 모델을 업로드하고 커뮤니티에 ... per_device_eval_batch_size=16, ... num_train_epochs=2, ... weight_decay=0.01, -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... load_best_model_at_end=True, ... push_to_hub=True, diff --git a/docs/source/ko/tasks/translation.md b/docs/source/ko/tasks/translation.md index 6de275f7d04c80..60ca77ee1e41bb 100644 --- a/docs/source/ko/tasks/translation.md +++ b/docs/source/ko/tasks/translation.md @@ -209,7 +209,7 @@ pip install transformers datasets evaluate sacrebleu ```py >>> training_args = Seq2SeqTrainingArguments( ... output_dir="my_awesome_opus_books_model", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/ko/tasks/video_classification.md b/docs/source/ko/tasks/video_classification.md index 01dbb0757b6608..762716c9ff7f8e 100644 --- a/docs/source/ko/tasks/video_classification.md +++ b/docs/source/ko/tasks/video_classification.md @@ -358,7 +358,7 @@ You should probably TRAIN this model on a down-stream task to be able to use it >>> args = TrainingArguments( ... new_model_name, ... remove_unused_columns=False, -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... save_strategy="epoch", ... learning_rate=5e-5, ... per_device_train_batch_size=batch_size, diff --git a/docs/source/ko/training.md b/docs/source/ko/training.md index fa6d56bdc36696..432ba186c3df0c 100644 --- a/docs/source/ko/training.md +++ b/docs/source/ko/training.md @@ -129,12 +129,12 @@ rendered properly in your Markdown viewer. ... return metric.compute(predictions=predictions, references=labels) ``` -미세 튜닝 중에 평가 지표를 모니터링하려면 훈련 인수에 `evaluation_strategy` 파라미터를 지정하여 각 에폭이 끝날 때 평가 지표를 확인할 수 있습니다: +미세 튜닝 중에 평가 지표를 모니터링하려면 훈련 인수에 `eval_strategy` 파라미터를 지정하여 각 에폭이 끝날 때 평가 지표를 확인할 수 있습니다: ```py >>> from transformers import TrainingArguments, Trainer ->>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch") +>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch") ``` ### 훈련 하기[[trainer]] diff --git a/docs/source/pt/tasks/token_classification.md b/docs/source/pt/tasks/token_classification.md index 3465680dcc2046..d4d6bf4dd906ee 100644 --- a/docs/source/pt/tasks/token_classification.md +++ b/docs/source/pt/tasks/token_classification.md @@ -180,7 +180,7 @@ Nesse ponto, restam apenas três passos: ```py >>> training_args = TrainingArguments( ... output_dir="./results", -... evaluation_strategy="epoch", +... eval_strategy="epoch", ... learning_rate=2e-5, ... per_device_train_batch_size=16, ... per_device_eval_batch_size=16, diff --git a/docs/source/pt/training.md b/docs/source/pt/training.md index 49f57dead24233..67294baee35c1f 100644 --- a/docs/source/pt/training.md +++ b/docs/source/pt/training.md @@ -146,13 +146,13 @@ todos os modelos de 🤗 Transformers retornam logits). ... return metric.compute(predictions=predictions, references=labels) ``` -Se quiser controlar as suas métricas de avaliação durante o fine-tuning, especifique o parâmetro `evaluation_strategy` +Se quiser controlar as suas métricas de avaliação durante o fine-tuning, especifique o parâmetro `eval_strategy` nos seus argumentos de treinamento para que o modelo considere a métrica de avaliação ao final de cada época: ```py >>> from transformers import TrainingArguments ->>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch") +>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch") ``` ### Trainer diff --git a/docs/source/zh/tasks/asr.md b/docs/source/zh/tasks/asr.md index 91fee0ab332ede..48ab94cb7d9503 100644 --- a/docs/source/zh/tasks/asr.md +++ b/docs/source/zh/tasks/asr.md @@ -288,7 +288,7 @@ Wav2Vec2 分词器仅训练了大写字符,因此您需要确保文本与分 ... gradient_checkpointing=True, ... fp16=True, ... group_by_length=True, -... evaluation_strategy="steps", +... eval_strategy="steps", ... per_device_eval_batch_size=8, ... save_steps=1000, ... eval_steps=1000, diff --git a/docs/source/zh/training.md b/docs/source/zh/training.md index 773c58181c31e9..aeacf732c22f42 100644 --- a/docs/source/zh/training.md +++ b/docs/source/zh/training.md @@ -125,12 +125,12 @@ rendered properly in your Markdown viewer. ... return metric.compute(predictions=predictions, references=labels) ``` -如果您希望在微调过程中监视评估指标,请在您的训练参数中指定 `evaluation_strategy` 参数,以在每个`epoch`结束时展示评估指标: +如果您希望在微调过程中监视评估指标,请在您的训练参数中指定 `eval_strategy` 参数,以在每个`epoch`结束时展示评估指标: ```py >>> from transformers import TrainingArguments, Trainer ->>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch") +>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch") ``` ### 训练器 diff --git a/examples/flax/language-modeling/README.md b/examples/flax/language-modeling/README.md index cb8671147ff98c..324c560ea4a7f3 100644 --- a/examples/flax/language-modeling/README.md +++ b/examples/flax/language-modeling/README.md @@ -490,7 +490,7 @@ python3 xla_spawn.py --num_cores ${NUM_TPUS} run_mlm.py --output_dir="./runs" \ --do_train \ --do_eval \ --logging_steps="500" \ - --evaluation_strategy="epoch" \ + --eval_strategy="epoch" \ --report_to="tensorboard" \ --save_strategy="no" ``` @@ -538,7 +538,7 @@ python3 -m torch.distributed.launch --nproc_per_node ${NUM_GPUS} run_mlm.py \ --do_train \ --do_eval \ --logging_steps="500" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --report_to="tensorboard" \ --save_strategy="no" ``` diff --git a/examples/legacy/seq2seq/finetune.sh b/examples/legacy/seq2seq/finetune.sh index 1f518835d63859..60023df7bad6ae 100644 --- a/examples/legacy/seq2seq/finetune.sh +++ b/examples/legacy/seq2seq/finetune.sh @@ -18,7 +18,7 @@ python finetune_trainer.py \ --learning_rate=3e-5 \ --fp16 \ --do_train --do_eval --do_predict \ - --evaluation_strategy steps \ + --eval_strategy steps \ --predict_with_generate \ --n_val 1000 \ "$@" diff --git a/examples/legacy/seq2seq/finetune_tpu.sh b/examples/legacy/seq2seq/finetune_tpu.sh index 68cf0d77360292..ef72b0953b440b 100644 --- a/examples/legacy/seq2seq/finetune_tpu.sh +++ b/examples/legacy/seq2seq/finetune_tpu.sh @@ -20,7 +20,7 @@ python xla_spawn.py --num_cores $TPU_NUM_CORES \ finetune_trainer.py \ --learning_rate=3e-5 \ --do_train --do_eval \ - --evaluation_strategy steps \ + --eval_strategy steps \ --prediction_loss_only \ --n_val 1000 \ "$@" diff --git a/examples/legacy/seq2seq/finetune_trainer.py b/examples/legacy/seq2seq/finetune_trainer.py index 4e186c96d8c218..e269bc2474eca5 100755 --- a/examples/legacy/seq2seq/finetune_trainer.py +++ b/examples/legacy/seq2seq/finetune_trainer.py @@ -271,7 +271,7 @@ def main(): max_source_length=data_args.max_source_length, prefix=model.config.prefix or "", ) - if training_args.do_eval or training_args.evaluation_strategy != EvaluationStrategy.NO + if training_args.do_eval or training_args.eval_strategy != EvaluationStrategy.NO else None ) test_dataset = ( diff --git a/examples/legacy/seq2seq/train_distil_marian_enro.sh b/examples/legacy/seq2seq/train_distil_marian_enro.sh index fc1b90595c5e69..5e86a6991c579e 100644 --- a/examples/legacy/seq2seq/train_distil_marian_enro.sh +++ b/examples/legacy/seq2seq/train_distil_marian_enro.sh @@ -32,7 +32,7 @@ python finetune_trainer.py \ --max_source_length $MAX_LEN --max_target_length $MAX_LEN \ --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \ --do_train --do_eval --do_predict \ - --evaluation_strategy steps \ + --eval_strategy steps \ --predict_with_generate --logging_first_step \ --task translation --label_smoothing_factor 0.1 \ "$@" diff --git a/examples/legacy/seq2seq/train_distil_marian_enro_tpu.sh b/examples/legacy/seq2seq/train_distil_marian_enro_tpu.sh index 2fce7684ab449d..00ef672261963b 100644 --- a/examples/legacy/seq2seq/train_distil_marian_enro_tpu.sh +++ b/examples/legacy/seq2seq/train_distil_marian_enro_tpu.sh @@ -33,7 +33,7 @@ python xla_spawn.py --num_cores $TPU_NUM_CORES \ --max_source_length $MAX_LEN --max_target_length $MAX_LEN \ --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \ --do_train --do_eval \ - --evaluation_strategy steps \ + --eval_strategy steps \ --prediction_loss_only \ --task translation --label_smoothing_factor 0.1 \ "$@" diff --git a/examples/legacy/seq2seq/train_distilbart_cnn.sh b/examples/legacy/seq2seq/train_distilbart_cnn.sh index ec0aec8e597fb4..42f34e0cb6e75a 100644 --- a/examples/legacy/seq2seq/train_distilbart_cnn.sh +++ b/examples/legacy/seq2seq/train_distilbart_cnn.sh @@ -34,6 +34,6 @@ python finetune_trainer.py \ --logging_first_step \ --max_target_length 56 --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN\ --do_train --do_eval --do_predict \ - --evaluation_strategy steps \ + --eval_strategy steps \ --predict_with_generate --sortish_sampler \ "$@" diff --git a/examples/legacy/seq2seq/train_mbart_cc25_enro.sh b/examples/legacy/seq2seq/train_mbart_cc25_enro.sh index 2b603eda7c35e6..63c8051b47def1 100644 --- a/examples/legacy/seq2seq/train_mbart_cc25_enro.sh +++ b/examples/legacy/seq2seq/train_mbart_cc25_enro.sh @@ -29,7 +29,7 @@ python finetune_trainer.py \ --num_train_epochs 6 \ --save_steps 25000 --eval_steps 25000 --logging_steps 1000 \ --do_train --do_eval --do_predict \ - --evaluation_strategy steps \ + --eval_strategy steps \ --predict_with_generate --logging_first_step \ --task translation \ "$@" diff --git a/examples/pytorch/README.md b/examples/pytorch/README.md index 63a56a06e8d5a4..c2f89f2477e691 100644 --- a/examples/pytorch/README.md +++ b/examples/pytorch/README.md @@ -283,7 +283,7 @@ To enable Neptune logging, in your `TrainingArguments`, set the `report_to` argu ```python training_args = TrainingArguments( "quick-training-distilbert-mrpc", - evaluation_strategy="steps", + eval_strategy="steps", eval_steps=20, report_to="neptune", ) diff --git a/examples/pytorch/audio-classification/README.md b/examples/pytorch/audio-classification/README.md index cc669a0894e14d..bc4581089c3fd2 100644 --- a/examples/pytorch/audio-classification/README.md +++ b/examples/pytorch/audio-classification/README.md @@ -50,7 +50,7 @@ python run_audio_classification.py \ --dataloader_num_workers 4 \ --logging_strategy steps \ --logging_steps 10 \ - --evaluation_strategy epoch \ + --eval_strategy epoch \ --save_strategy epoch \ --load_best_model_at_end True \ --metric_for_best_model accuracy \ @@ -92,7 +92,7 @@ python run_audio_classification.py \ --dataloader_num_workers 8 \ --logging_strategy steps \ --logging_steps 10 \ - --evaluation_strategy epoch \ + --eval_strategy epoch \ --save_strategy epoch \ --load_best_model_at_end True \ --metric_for_best_model accuracy \ diff --git a/examples/pytorch/image-classification/README.md b/examples/pytorch/image-classification/README.md index 112cc51764a38e..62996ee19e375a 100644 --- a/examples/pytorch/image-classification/README.md +++ b/examples/pytorch/image-classification/README.md @@ -52,7 +52,7 @@ python run_image_classification.py \ --per_device_eval_batch_size 8 \ --logging_strategy steps \ --logging_steps 10 \ - --evaluation_strategy epoch \ + --eval_strategy epoch \ --save_strategy epoch \ --load_best_model_at_end True \ --save_total_limit 3 \ diff --git a/examples/pytorch/image-pretraining/README.md b/examples/pytorch/image-pretraining/README.md index 65bb863f38b6ce..88c71e643e4c24 100644 --- a/examples/pytorch/image-pretraining/README.md +++ b/examples/pytorch/image-pretraining/README.md @@ -56,7 +56,7 @@ Alternatively, one can decide to further pre-train an already pre-trained (or fi --per_device_eval_batch_size 8 \ --logging_strategy steps \ --logging_steps 10 \ - --evaluation_strategy epoch \ + --eval_strategy epoch \ --save_strategy epoch \ --load_best_model_at_end True \ --save_total_limit 3 \ @@ -106,7 +106,7 @@ Next, we can run the script by providing the path to this custom configuration ( --per_device_eval_batch_size 8 \ --logging_strategy steps \ --logging_steps 10 \ - --evaluation_strategy epoch \ + --eval_strategy epoch \ --save_strategy epoch \ --load_best_model_at_end True \ --save_total_limit 3 \ @@ -172,7 +172,7 @@ python run_mae.py \ --per_device_eval_batch_size 8 \ --logging_strategy steps \ --logging_steps 10 \ - --evaluation_strategy epoch \ + --eval_strategy epoch \ --save_strategy epoch \ --load_best_model_at_end True \ --save_total_limit 3 \ diff --git a/examples/pytorch/semantic-segmentation/README.md b/examples/pytorch/semantic-segmentation/README.md index a0f830e16e915c..0be42d4fe84483 100644 --- a/examples/pytorch/semantic-segmentation/README.md +++ b/examples/pytorch/semantic-segmentation/README.md @@ -118,7 +118,7 @@ python run_semantic_segmentation.py \ --per_device_eval_batch_size 8 \ --logging_strategy steps \ --logging_steps 100 \ - --evaluation_strategy epoch \ + --eval_strategy epoch \ --save_strategy epoch \ --seed 1337 ``` diff --git a/examples/pytorch/speech-recognition/README.md b/examples/pytorch/speech-recognition/README.md index 8dbfcafe3405f9..b9cab9513bd446 100644 --- a/examples/pytorch/speech-recognition/README.md +++ b/examples/pytorch/speech-recognition/README.md @@ -76,7 +76,7 @@ python run_speech_recognition_ctc.py \ --gradient_accumulation_steps="2" \ --learning_rate="3e-4" \ --warmup_steps="500" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --text_column_name="sentence" \ --length_column_name="input_length" \ --save_steps="400" \ @@ -111,7 +111,7 @@ torchrun \ --per_device_train_batch_size="4" \ --learning_rate="3e-4" \ --warmup_steps="500" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --text_column_name="sentence" \ --length_column_name="input_length" \ --save_steps="400" \ @@ -162,7 +162,7 @@ However, the `--shuffle_buffer_size` argument controls how many examples we can --gradient_accumulation_steps="2" \ --learning_rate="5e-4" \ --warmup_steps="500" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --text_column_name="sentence" \ --save_steps="500" \ --eval_steps="500" \ @@ -293,7 +293,7 @@ python run_speech_recognition_ctc.py \ --per_device_train_batch_size="32" \ --learning_rate="1e-3" \ --warmup_steps="100" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --text_column_name="sentence" \ --length_column_name="input_length" \ --save_steps="200" \ @@ -330,7 +330,7 @@ python run_speech_recognition_ctc.py \ --per_device_train_batch_size="32" \ --learning_rate="1e-3" \ --warmup_steps="100" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --text_column_name="sentence" \ --length_column_name="input_length" \ --save_steps="200" \ @@ -378,7 +378,7 @@ python run_speech_recognition_seq2seq.py \ --logging_steps="25" \ --learning_rate="1e-5" \ --warmup_steps="500" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --eval_steps="1000" \ --save_strategy="steps" \ --save_steps="1000" \ @@ -419,7 +419,7 @@ torchrun \ --logging_steps="25" \ --learning_rate="1e-5" \ --warmup_steps="500" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --eval_steps="1000" \ --save_strategy="steps" \ --save_steps="1000" \ @@ -547,7 +547,7 @@ python run_speech_recognition_seq2seq.py \ --gradient_accumulation_steps="8" \ --learning_rate="3e-4" \ --warmup_steps="400" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --text_column_name="text" \ --save_steps="400" \ --eval_steps="400" \ @@ -589,7 +589,7 @@ torchrun \ --gradient_accumulation_steps="1" \ --learning_rate="3e-4" \ --warmup_steps="400" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --text_column_name="text" \ --save_steps="400" \ --eval_steps="400" \ diff --git a/examples/research_projects/codeparrot/examples/train_complexity_predictor.py b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py index 927a15f9be679f..de06b988db634c 100644 --- a/examples/research_projects/codeparrot/examples/train_complexity_predictor.py +++ b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py @@ -100,7 +100,7 @@ def tokenize(example): output_dir=args.output_dir, learning_rate=args.learning_rate, lr_scheduler_type=args.lr_scheduler_type, - evaluation_strategy="epoch", + eval_strategy="epoch", save_strategy="epoch", logging_strategy="epoch", per_device_train_batch_size=args.batch_size, diff --git a/examples/research_projects/layoutlmv3/README.md b/examples/research_projects/layoutlmv3/README.md index 17bf4bb67cd90f..2cc0fb75bd2c16 100644 --- a/examples/research_projects/layoutlmv3/README.md +++ b/examples/research_projects/layoutlmv3/README.md @@ -32,7 +32,7 @@ python run_funsd_cord.py \ --do_train \ --do_eval \ --max_steps 1000 \ - --evaluation_strategy steps \ + --eval_strategy steps \ --eval_steps 100 \ --learning_rate 1e-5 \ --load_best_model_at_end \ @@ -57,7 +57,7 @@ python run_funsd_cord.py \ --do_train \ --do_eval \ --max_steps 1000 \ - --evaluation_strategy steps \ + --eval_strategy steps \ --eval_steps 100 \ --learning_rate 5e-5 \ --load_best_model_at_end \ diff --git a/examples/research_projects/robust-speech-event/README.md b/examples/research_projects/robust-speech-event/README.md index 5c7bf42a00445a..ca3c5cdecdecea 100644 --- a/examples/research_projects/robust-speech-event/README.md +++ b/examples/research_projects/robust-speech-event/README.md @@ -362,7 +362,7 @@ echo '''python run_speech_recognition_ctc.py \ --per_device_train_batch_size="2" \ --learning_rate="3e-4" \ --save_total_limit="1" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --text_column_name="sentence" \ --length_column_name="input_length" \ --save_steps="5" \ @@ -438,7 +438,7 @@ echo '''python run_speech_recognition_ctc.py \ --learning_rate="7.5e-5" \ --warmup_steps="2000" \ --length_column_name="input_length" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --text_column_name="sentence" \ --chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – \ --save_steps="500" \ diff --git a/examples/research_projects/self-training-text-classification/README.md b/examples/research_projects/self-training-text-classification/README.md index 7e0f3f97148ee6..062d5de7afd057 100644 --- a/examples/research_projects/self-training-text-classification/README.md +++ b/examples/research_projects/self-training-text-classification/README.md @@ -51,7 +51,7 @@ parameters_dict = { 'train_file': os.path.join(data_dir, 'train.csv'), 'infer_file': os.path.join(data_dir, 'infer.csv'), 'eval_file': os.path.join(data_dir, 'eval.csv'), - 'evaluation_strategy': 'steps', + 'eval_strategy': 'steps', 'task_name': 'scitail', 'label_list': ['entails', 'neutral'], 'per_device_train_batch_size': 32, diff --git a/examples/research_projects/self-training-text-classification/finetuning.py b/examples/research_projects/self-training-text-classification/finetuning.py index eeb0a285dff987..0afff6a91eadca 100644 --- a/examples/research_projects/self-training-text-classification/finetuning.py +++ b/examples/research_projects/self-training-text-classification/finetuning.py @@ -190,7 +190,7 @@ class FTTrainingArguments: ) }, ) - evaluation_strategy: Optional[str] = dataclasses.field( + eval_strategy: Optional[str] = dataclasses.field( default="no", metadata={ "help": 'The evaluation strategy to adopt during training. Possible values are: ["no", "step", "epoch]' @@ -198,7 +198,7 @@ class FTTrainingArguments: ) eval_steps: Optional[int] = dataclasses.field( default=1, - metadata={"help": 'Number of update steps between two evaluations if `evaluation_strategy="steps"`.'}, + metadata={"help": 'Number of update steps between two evaluations if `eval_strategy="steps"`.'}, ) eval_metric: Optional[str] = dataclasses.field( default="accuracy", metadata={"help": "The evaluation metric used for the task."} @@ -265,7 +265,7 @@ def train(args, accelerator, model, tokenizer, train_dataloader, optimizer, lr_s # Evaluate during training if ( eval_dataloader is not None - and args.evaluation_strategy == IntervalStrategy.STEPS.value + and args.eval_strategy == IntervalStrategy.STEPS.value and args.eval_steps > 0 and completed_steps % args.eval_steps == 0 ): @@ -331,7 +331,7 @@ def train(args, accelerator, model, tokenizer, train_dataloader, optimizer, lr_s break # Evaluate during training - if eval_dataloader is not None and args.evaluation_strategy == IntervalStrategy.EPOCH.value: + if eval_dataloader is not None and args.eval_strategy == IntervalStrategy.EPOCH.value: accelerator.wait_for_everyone() new_checkpoint = f"checkpoint-{IntervalStrategy.EPOCH.value}-{epoch}" new_eval_result = evaluate(args, accelerator, eval_dataloader, "eval", model, new_checkpoint)[ @@ -571,7 +571,7 @@ def finetune(accelerator, model_name_or_path, train_file, output_dir, **kwargs): assert args.train_file is not None data_files[Split.TRAIN.value] = args.train_file - if args.do_eval or args.evaluation_strategy != IntervalStrategy.NO.value: + if args.do_eval or args.eval_strategy != IntervalStrategy.NO.value: assert args.eval_file is not None data_files[Split.EVAL.value] = args.eval_file diff --git a/examples/research_projects/self-training-text-classification/run.sh b/examples/research_projects/self-training-text-classification/run.sh index 435a41461801e6..34e91d7c127c89 100755 --- a/examples/research_projects/self-training-text-classification/run.sh +++ b/examples/research_projects/self-training-text-classification/run.sh @@ -60,7 +60,7 @@ parameters_dict = { 'train_file': os.path.join(data_dir, '${TRAIN_FILE}'), 'infer_file': os.path.join(data_dir, '${INFER_FILE}'), 'eval_file': os.path.join(data_dir, '${EVAL_FILE}'), - 'evaluation_strategy': 'steps', + 'eval_strategy': 'steps', 'task_name': 'scitail', 'label_list': ['entails', 'neutral'], 'per_device_train_batch_size': 32, diff --git a/examples/research_projects/self-training-text-classification/selftraining.py b/examples/research_projects/self-training-text-classification/selftraining.py index 70a6c2f319e0cb..d741225b061e88 100644 --- a/examples/research_projects/self-training-text-classification/selftraining.py +++ b/examples/research_projects/self-training-text-classification/selftraining.py @@ -79,7 +79,7 @@ class STTrainingArguments: eval_metric: Optional[str] = dataclasses.field( default="accuracy", metadata={"help": "The evaluation metric used for the task."} ) - evaluation_strategy: Optional[str] = dataclasses.field( + eval_strategy: Optional[str] = dataclasses.field( default="no", metadata={ "help": 'The evaluation strategy to adopt during training. Possible values are: ["no", "step", "epoch]' @@ -208,7 +208,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs): data_files["train"] = args.train_file data_files["infer"] = args.infer_file - if args.evaluation_strategy != IntervalStrategy.NO.value: + if args.eval_strategy != IntervalStrategy.NO.value: assert args.eval_file is not None data_files["eval"] = args.eval_file @@ -267,7 +267,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs): "label_list": args.label_list, "output_dir": current_output_dir, "eval_metric": args.eval_metric, - "evaluation_strategy": args.evaluation_strategy, + "eval_strategy": args.eval_strategy, "early_stopping_patience": args.early_stopping_patience, "early_stopping_threshold": args.early_stopping_threshold, "seed": args.seed, @@ -341,7 +341,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs): data_files["train_pseudo"] = os.path.join(next_data_dir, f"train_pseudo.{args.data_file_extension}") - if args.evaluation_strategy != IntervalStrategy.NO.value: + if args.eval_strategy != IntervalStrategy.NO.value: new_eval_result = eval_result if best_iteration is None: diff --git a/examples/research_projects/tapex/README.md b/examples/research_projects/tapex/README.md index 7d98901e281e65..b98eb9b428d01c 100644 --- a/examples/research_projects/tapex/README.md +++ b/examples/research_projects/tapex/README.md @@ -71,7 +71,7 @@ python run_wikisql_with_tapex.py \ --eval_steps 1000 \ --save_steps 1000 \ --warmup_steps 1000 \ - --evaluation_strategy steps \ + --eval_strategy steps \ --predict_with_generate \ --num_beams 5 \ --weight_decay 1e-2 \ @@ -101,7 +101,7 @@ python run_wikisql_with_tapex.py \ --eval_steps 1000 \ --save_steps 1000 \ --warmup_steps 1000 \ - --evaluation_strategy steps \ + --eval_strategy steps \ --predict_with_generate \ --num_beams 5 \ --weight_decay 1e-2 \ @@ -132,7 +132,7 @@ python run_wikitablequestions_with_tapex.py \ --eval_steps 1000 \ --save_steps 1000 \ --warmup_steps 1000 \ - --evaluation_strategy steps \ + --eval_strategy steps \ --predict_with_generate \ --num_beams 5 \ --weight_decay 1e-2 \ @@ -162,7 +162,7 @@ python run_wikitablequestions_with_tapex.py \ --eval_steps 1000 \ --save_steps 1000 \ --warmup_steps 1000 \ - --evaluation_strategy steps \ + --eval_strategy steps \ --predict_with_generate \ --num_beams 5 \ --weight_decay 1e-2 \ @@ -223,7 +223,7 @@ python run_tabfact_with_tapex.py \ --learning_rate 3e-5 \ --eval_steps 1000 \ --save_steps 1000 \ - --evaluation_strategy steps \ + --eval_strategy steps \ --weight_decay 1e-2 \ --max_steps 30000 \ --max_grad_norm 0.1 @@ -252,7 +252,7 @@ python run_tabfact_with_tapex.py \ --learning_rate 3e-5 \ --eval_steps 1000 \ --save_steps 1000 \ - --evaluation_strategy steps \ + --eval_strategy steps \ --weight_decay 1e-2 \ --max_steps 30000 \ --max_grad_norm 0.1 diff --git a/examples/research_projects/wav2vec2/FINE_TUNE_XLSR_WAV2VEC2.md b/examples/research_projects/wav2vec2/FINE_TUNE_XLSR_WAV2VEC2.md index 52553532fe08ab..7a580a36132441 100644 --- a/examples/research_projects/wav2vec2/FINE_TUNE_XLSR_WAV2VEC2.md +++ b/examples/research_projects/wav2vec2/FINE_TUNE_XLSR_WAV2VEC2.md @@ -182,7 +182,7 @@ Here we will run the script on the *Turkish* Common Voice dataset for demonstrat --per_device_train_batch_size="16" \ --learning_rate="3e-4" \ --warmup_steps="500" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --save_steps="400" \ --eval_steps="400" \ --logging_steps="400" \ @@ -209,7 +209,7 @@ Here we will run the script on the *Turkish* Common Voice dataset for demonstrat --per_device_train_batch_size="16" \ --learning_rate="3e-4" \ --warmup_steps="500" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --save_steps="400" \ --eval_steps="400" \ --logging_steps="400" \ diff --git a/examples/research_projects/wav2vec2/README.md b/examples/research_projects/wav2vec2/README.md index cc667d6567ff95..88f62778a3add9 100644 --- a/examples/research_projects/wav2vec2/README.md +++ b/examples/research_projects/wav2vec2/README.md @@ -18,7 +18,7 @@ python run_asr.py \ --num_train_epochs="30" \ --per_device_train_batch_size="20" \ --per_device_eval_batch_size="20" \ ---evaluation_strategy="steps" \ +--eval_strategy="steps" \ --save_steps="500" \ --eval_steps="100" \ --logging_steps="50" \ @@ -73,7 +73,7 @@ python run_asr.py \ --per_device_train_batch_size="1" \ --per_device_eval_batch_size="1" \ --gradient_accumulation_steps="8" \ ---evaluation_strategy="steps" \ +--eval_strategy="steps" \ --save_steps="500" \ --eval_steps="100" \ --logging_steps="50" \ @@ -152,7 +152,7 @@ ZeRO-2: PYTHONPATH=../../../src deepspeed --num_gpus 2 \ run_asr.py \ --output_dir=output_dir --num_train_epochs=2 --per_device_train_batch_size=2 \ ---per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \ +--per_device_eval_batch_size=2 --eval_strategy=steps --save_steps=500 --eval_steps=100 \ --logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \ --model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \ --dataset_name=hf-internal-testing/librispeech_asr_dummy --dataset_config_name=clean \ @@ -176,7 +176,7 @@ ZeRO-3: PYTHONPATH=../../../src deepspeed --num_gpus 2 \ run_asr.py \ --output_dir=output_dir --num_train_epochs=2 --per_device_train_batch_size=2 \ ---per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \ +--per_device_eval_batch_size=2 --eval_strategy=steps --save_steps=500 --eval_steps=100 \ --logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \ --model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \ --dataset_name=hf-internal-testing/librispeech_asr_dummy --dataset_config_name=clean \ diff --git a/examples/research_projects/wav2vec2/finetune_base_100.sh b/examples/research_projects/wav2vec2/finetune_base_100.sh index 8002dd81235f9e..254b0afef3d62e 100755 --- a/examples/research_projects/wav2vec2/finetune_base_100.sh +++ b/examples/research_projects/wav2vec2/finetune_base_100.sh @@ -4,7 +4,7 @@ python run_asr.py \ --num_train_epochs="30" \ --per_device_train_batch_size="32" \ --per_device_eval_batch_size="32" \ ---evaluation_strategy="steps" \ +--eval_strategy="steps" \ --save_total_limit="3" \ --save_steps="500" \ --eval_steps="100" \ diff --git a/examples/research_projects/wav2vec2/finetune_base_timit_asr.sh b/examples/research_projects/wav2vec2/finetune_base_timit_asr.sh index 6219e26b642f63..508cb532b0f08d 100755 --- a/examples/research_projects/wav2vec2/finetune_base_timit_asr.sh +++ b/examples/research_projects/wav2vec2/finetune_base_timit_asr.sh @@ -4,7 +4,7 @@ python run_asr.py \ --num_train_epochs="30" \ --per_device_train_batch_size="20" \ --per_device_eval_batch_size="20" \ ---evaluation_strategy="steps" \ +--eval_strategy="steps" \ --save_steps="500" \ --eval_steps="100" \ --logging_steps="50" \ diff --git a/examples/research_projects/wav2vec2/finetune_large_lv60_100.sh b/examples/research_projects/wav2vec2/finetune_large_lv60_100.sh index 3d2423df970c8e..6956b093e72530 100755 --- a/examples/research_projects/wav2vec2/finetune_large_lv60_100.sh +++ b/examples/research_projects/wav2vec2/finetune_large_lv60_100.sh @@ -4,7 +4,7 @@ python run_asr.py \ --num_train_epochs="30" \ --per_device_train_batch_size="16" \ --per_device_eval_batch_size="16" \ ---evaluation_strategy="steps" \ +--eval_strategy="steps" \ --save_total_limit="3" \ --save_steps="500" \ --eval_steps="100" \ diff --git a/examples/research_projects/wav2vec2/finetune_large_lv60_timit_asr.sh b/examples/research_projects/wav2vec2/finetune_large_lv60_timit_asr.sh index eb9671d015271e..fa02e71ea82c68 100755 --- a/examples/research_projects/wav2vec2/finetune_large_lv60_timit_asr.sh +++ b/examples/research_projects/wav2vec2/finetune_large_lv60_timit_asr.sh @@ -5,7 +5,7 @@ python run_asr.py \ --per_device_train_batch_size="2" \ --per_device_eval_batch_size="2" \ --gradient_accumulation_steps="4" \ ---evaluation_strategy="steps" \ +--eval_strategy="steps" \ --save_steps="500" \ --eval_steps="100" \ --logging_steps="50" \ diff --git a/examples/research_projects/wav2vec2/finetune_large_xlsr_53_arabic_speech_corpus.sh b/examples/research_projects/wav2vec2/finetune_large_xlsr_53_arabic_speech_corpus.sh index 9b325c42771e64..e90bc8caa6c001 100755 --- a/examples/research_projects/wav2vec2/finetune_large_xlsr_53_arabic_speech_corpus.sh +++ b/examples/research_projects/wav2vec2/finetune_large_xlsr_53_arabic_speech_corpus.sh @@ -5,7 +5,7 @@ python run_asr.py \ --per_device_train_batch_size="1" \ --per_device_eval_batch_size="1" \ --gradient_accumulation_steps="8" \ ---evaluation_strategy="steps" \ +--eval_strategy="steps" \ --save_steps="500" \ --eval_steps="100" \ --logging_steps="50" \ diff --git a/examples/research_projects/wav2vec2/finetune_wav2vec2_xlsr_turkish.sh b/examples/research_projects/wav2vec2/finetune_wav2vec2_xlsr_turkish.sh index 0726bb09eb51e2..70da0e0a0d1219 100644 --- a/examples/research_projects/wav2vec2/finetune_wav2vec2_xlsr_turkish.sh +++ b/examples/research_projects/wav2vec2/finetune_wav2vec2_xlsr_turkish.sh @@ -6,7 +6,7 @@ python run_common_voice.py \ --overwrite_output_dir \ --num_train_epochs="5" \ --per_device_train_batch_size="16" \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --learning_rate="3e-4" \ --warmup_steps="500" \ --fp16 \ diff --git a/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py b/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py index d44145f3e0c12f..8fb2df71112594 100644 --- a/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py +++ b/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py @@ -161,7 +161,7 @@ def run_trainer( --num_train_epochs {str(num_train_epochs)} --per_device_train_batch_size 2 --per_device_eval_batch_size 2 - --evaluation_strategy steps + --eval_strategy steps --learning_rate 5e-4 --warmup_steps 8 --orthography timit diff --git a/examples/research_projects/xtreme-s/README.md b/examples/research_projects/xtreme-s/README.md index dc7e783c75d124..5314ba9880ad35 100644 --- a/examples/research_projects/xtreme-s/README.md +++ b/examples/research_projects/xtreme-s/README.md @@ -90,7 +90,7 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps=2 \ --learning_rate="3e-4" \ --warmup_steps=3000 \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --max_duration_in_seconds=20 \ --save_steps=500 \ --eval_steps=500 \ @@ -134,7 +134,7 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps=1 \ --learning_rate="3e-4" \ --warmup_steps=1500 \ - --evaluation_strategy="steps" \ + --eval_strategy="steps" \ --max_duration_in_seconds=30 \ --save_steps=200 \ --eval_steps=200 \ diff --git a/examples/tensorflow/image-classification/README.md b/examples/tensorflow/image-classification/README.md index 96979330ddc5b5..a343b443ef1ae5 100644 --- a/examples/tensorflow/image-classification/README.md +++ b/examples/tensorflow/image-classification/README.md @@ -45,7 +45,7 @@ python run_image_classification.py \ --per_device_eval_batch_size 8 \ --logging_strategy steps \ --logging_steps 10 \ - --evaluation_strategy epoch \ + --eval_strategy epoch \ --save_strategy epoch \ --load_best_model_at_end True \ --save_total_limit 3 \ diff --git a/src/transformers/integrations/integration_utils.py b/src/transformers/integrations/integration_utils.py index 00074a9574b548..63b9e050d4a1d3 100644 --- a/src/transformers/integrations/integration_utils.py +++ b/src/transformers/integrations/integration_utils.py @@ -320,13 +320,13 @@ def _objective(trial: dict, local_trainer): # Check for `do_eval` and `eval_during_training` for schedulers that require intermediate reporting. if isinstance( kwargs["scheduler"], (ASHAScheduler, MedianStoppingRule, HyperBandForBOHB, PopulationBasedTraining) - ) and (not trainer.args.do_eval or trainer.args.evaluation_strategy == IntervalStrategy.NO): + ) and (not trainer.args.do_eval or trainer.args.eval_strategy == IntervalStrategy.NO): raise RuntimeError( "You are using {cls} as a scheduler but you haven't enabled evaluation during training. " "This means your trials will not report intermediate results to Ray Tune, and " "can thus not be stopped early or used to exploit other trials parameters. " "If this is what you want, do not use {cls}. If you would like to use {cls}, " - "make sure you pass `do_eval=True` and `evaluation_strategy='steps'` in the " + "make sure you pass `do_eval=True` and `eval_strategy='steps'` in the " "Trainer `args`.".format(cls=type(kwargs["scheduler"]).__name__) ) diff --git a/src/transformers/trainer_callback.py b/src/transformers/trainer_callback.py index 1e3b0e587a74c6..b8e9aaf6fe55ec 100644 --- a/src/transformers/trainer_callback.py +++ b/src/transformers/trainer_callback.py @@ -443,7 +443,7 @@ def on_step_end(self, args: TrainingArguments, state: TrainerState, control: Tra # Evaluate if ( - args.evaluation_strategy == IntervalStrategy.STEPS + args.eval_strategy == IntervalStrategy.STEPS and state.global_step % state.eval_steps == 0 and args.eval_delay <= state.global_step ): @@ -469,7 +469,7 @@ def on_epoch_end(self, args: TrainingArguments, state: TrainerState, control: Tr control.should_log = True # Evaluate - if args.evaluation_strategy == IntervalStrategy.EPOCH and args.eval_delay <= state.epoch: + if args.eval_strategy == IntervalStrategy.EPOCH and args.eval_delay <= state.epoch: control.should_evaluate = True # Save @@ -580,7 +580,7 @@ def on_train_begin(self, args, state, control, **kwargs): args.metric_for_best_model is not None ), "EarlyStoppingCallback requires metric_for_best_model is defined" assert ( - args.evaluation_strategy != IntervalStrategy.NO + args.eval_strategy != IntervalStrategy.NO ), "EarlyStoppingCallback requires IntervalStrategy of steps or epoch" def on_evaluate(self, args, state, control, metrics, **kwargs): diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index cdf6325c4b4ae6..57061182757030 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -195,7 +195,7 @@ class TrainingArguments: by your training/evaluation scripts instead. See the [example scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. do_eval (`bool`, *optional*): - Whether to run evaluation on the validation set or not. Will be set to `True` if `evaluation_strategy` is + Whether to run evaluation on the validation set or not. Will be set to `True` if `eval_strategy` is different from `"no"`. This argument is not directly used by [`Trainer`], it's intended to be used by your training/evaluation scripts instead. See the [example scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. @@ -203,7 +203,7 @@ class TrainingArguments: Whether to run predictions on the test set or not. This argument is not directly used by [`Trainer`], it's intended to be used by your training/evaluation scripts instead. See the [example scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. - evaluation_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`): + eval_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`): The evaluation strategy to adopt during training. Possible values are: - `"no"`: No evaluation is done during training. @@ -232,7 +232,7 @@ class TrainingArguments: requires more memory). eval_delay (`float`, *optional*): Number of epochs or steps to wait for before the first evaluation can be performed, depending on the - evaluation_strategy. + eval_strategy. learning_rate (`float`, *optional*, defaults to 5e-5): The initial learning rate for [`AdamW`] optimizer. weight_decay (`float`, *optional*, defaults to 0): @@ -375,7 +375,7 @@ class TrainingArguments: Whether to drop the last incomplete batch (if the length of the dataset is not divisible by the batch size) or not. eval_steps (`int` or `float`, *optional*): - Number of update steps between two evaluations if `evaluation_strategy="steps"`. Will default to the same + Number of update steps between two evaluations if `eval_strategy="steps"`. Will default to the same value as `logging_steps` if not set. Should be an integer or a float in range `[0,1)`. If smaller than 1, will be interpreted as ratio of total training steps. dataloader_num_workers (`int`, *optional*, defaults to 0): @@ -409,7 +409,7 @@ class TrainingArguments: - When set to `True`, the parameters `save_strategy` needs to be the same as `evaluation_strategy`, and in + When set to `True`, the parameters `save_strategy` needs to be the same as `eval_strategy`, and in the case it is "steps", `save_steps` must be a round multiple of `eval_steps`. @@ -733,7 +733,7 @@ class TrainingArguments: do_train: bool = field(default=False, metadata={"help": "Whether to run training."}) do_eval: bool = field(default=False, metadata={"help": "Whether to run eval on the dev set."}) do_predict: bool = field(default=False, metadata={"help": "Whether to run predictions on the test set."}) - evaluation_strategy: Union[IntervalStrategy, str] = field( + eval_strategy: Union[IntervalStrategy, str] = field( default="no", metadata={"help": "The evaluation strategy to use."}, ) @@ -782,7 +782,7 @@ class TrainingArguments: metadata={ "help": ( "Number of epochs or steps to wait for before the first evaluation can be performed, depending on the" - " evaluation_strategy." + " eval_strategy." ) }, ) @@ -1269,6 +1269,10 @@ class TrainingArguments: "choices": ["auto", "apex", "cpu_amp"], }, ) + evaluation_strategy: Union[IntervalStrategy, str] = field( + default=None, + metadata={"help": "Deprecated. Use `eval_strategy` instead"}, + ) push_to_hub_model_id: Optional[str] = field( default=None, metadata={"help": "The name of the repository to which push the `Trainer`."} ) @@ -1393,14 +1397,21 @@ def __post_init__(self): if self.disable_tqdm is None: self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN - if isinstance(self.evaluation_strategy, EvaluationStrategy): + if self.evaluation_strategy is not None: + warnings.warn( + "`evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead", + FutureWarning, + ) + self.eval_strategy = self.evaluation_strategy + + if isinstance(self.eval_strategy, EvaluationStrategy): warnings.warn( - "using `EvaluationStrategy` for `evaluation_strategy` is deprecated and will be removed in version 5" + "using `EvaluationStrategy` for `eval_strategy` is deprecated and will be removed in version 5" " of 🤗 Transformers. Use `IntervalStrategy` instead", FutureWarning, ) # Go back to the underlying string or we won't be able to instantiate `IntervalStrategy` on it. - self.evaluation_strategy = self.evaluation_strategy.value + self.eval_strategy = self.eval_strategy.value if self.no_cuda: warnings.warn( "using `no_cuda` is deprecated and will be removed in version 5.0 of 🤗 Transformers. " @@ -1409,23 +1420,23 @@ def __post_init__(self): ) self.use_cpu = self.no_cuda - self.evaluation_strategy = IntervalStrategy(self.evaluation_strategy) + self.eval_strategy = IntervalStrategy(self.eval_strategy) self.logging_strategy = IntervalStrategy(self.logging_strategy) self.save_strategy = IntervalStrategy(self.save_strategy) self.hub_strategy = HubStrategy(self.hub_strategy) self.lr_scheduler_type = SchedulerType(self.lr_scheduler_type) - if self.do_eval is False and self.evaluation_strategy != IntervalStrategy.NO: + if self.do_eval is False and self.eval_strategy != IntervalStrategy.NO: self.do_eval = True # eval_steps has to be defined and non-zero, fallbacks to logging_steps if the latter is non-zero - if self.evaluation_strategy == IntervalStrategy.STEPS and (self.eval_steps is None or self.eval_steps == 0): + if self.eval_strategy == IntervalStrategy.STEPS and (self.eval_steps is None or self.eval_steps == 0): if self.logging_steps > 0: logger.info(f"using `logging_steps` to initialize `eval_steps` to {self.logging_steps}") self.eval_steps = self.logging_steps else: raise ValueError( - f"evaluation strategy {self.evaluation_strategy} requires either non-zero --eval_steps or" + f"evaluation strategy {self.eval_strategy} requires either non-zero --eval_steps or" " --logging_steps" ) @@ -1437,7 +1448,7 @@ def __post_init__(self): if self.logging_steps != int(self.logging_steps): raise ValueError(f"--logging_steps must be an integer if bigger than 1: {self.logging_steps}") self.logging_steps = int(self.logging_steps) - if self.evaluation_strategy == IntervalStrategy.STEPS and self.eval_steps > 1: + if self.eval_strategy == IntervalStrategy.STEPS and self.eval_steps > 1: if self.eval_steps != int(self.eval_steps): raise ValueError(f"--eval_steps must be an integer if bigger than 1: {self.eval_steps}") self.eval_steps = int(self.eval_steps) @@ -1448,12 +1459,12 @@ def __post_init__(self): # Sanity checks for load_best_model_at_end: we require save and eval strategies to be compatible. if self.load_best_model_at_end: - if self.evaluation_strategy != self.save_strategy: + if self.eval_strategy != self.save_strategy: raise ValueError( "--load_best_model_at_end requires the save and eval strategy to match, but found\n- Evaluation " - f"strategy: {self.evaluation_strategy}\n- Save strategy: {self.save_strategy}" + f"strategy: {self.eval_strategy}\n- Save strategy: {self.save_strategy}" ) - if self.evaluation_strategy == IntervalStrategy.STEPS and self.save_steps % self.eval_steps != 0: + if self.eval_strategy == IntervalStrategy.STEPS and self.save_steps % self.eval_steps != 0: if self.eval_steps < 1 or self.save_steps < 1: if not (self.eval_steps < 1 and self.save_steps < 1): raise ValueError( @@ -1531,7 +1542,7 @@ def __post_init__(self): raise ValueError(" `--half_precision_backend apex`: GPU bf16 is not supported by apex.") if self.lr_scheduler_type == SchedulerType.REDUCE_ON_PLATEAU: - if self.evaluation_strategy == IntervalStrategy.NO: + if self.eval_strategy == IntervalStrategy.NO: raise ValueError("lr_scheduler_type reduce_lr_on_plateau requires an eval strategy") if not is_torch_available(): raise ValueError("lr_scheduler_type reduce_lr_on_plateau requires torch>=0.2.0") @@ -2388,7 +2399,7 @@ def set_evaluate( but requires more memory). delay (`float`, *optional*): Number of epochs or steps to wait for before the first evaluation can be performed, depending on the - evaluation_strategy. + eval_strategy. loss_only (`bool`, *optional*, defaults to `False`): Ignores all outputs except the loss. jit_mode (`bool`, *optional*): @@ -2405,10 +2416,10 @@ def set_evaluate( 100 ``` """ - self.evaluation_strategy = IntervalStrategy(strategy) - if self.evaluation_strategy == IntervalStrategy.STEPS and steps == 0: + self.eval_strategy = IntervalStrategy(strategy) + if self.eval_strategy == IntervalStrategy.STEPS and steps == 0: raise ValueError("Setting `strategy` as 'steps' requires a positive value for `steps`.") - self.do_eval = self.evaluation_strategy != IntervalStrategy.NO + self.do_eval = self.eval_strategy != IntervalStrategy.NO self.eval_steps = steps self.per_device_eval_batch_size = batch_size self.eval_accumulation_steps = accumulation_steps diff --git a/src/transformers/training_args_tf.py b/src/transformers/training_args_tf.py index 4498f4cb793b92..12a6c5afe926bf 100644 --- a/src/transformers/training_args_tf.py +++ b/src/transformers/training_args_tf.py @@ -49,7 +49,7 @@ class TFTrainingArguments(TrainingArguments): by your training/evaluation scripts instead. See the [example scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. do_eval (`bool`, *optional*): - Whether to run evaluation on the validation set or not. Will be set to `True` if `evaluation_strategy` is + Whether to run evaluation on the validation set or not. Will be set to `True` if `eval_strategy` is different from `"no"`. This argument is not directly used by [`Trainer`], it's intended to be used by your training/evaluation scripts instead. See the [example scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. @@ -57,7 +57,7 @@ class TFTrainingArguments(TrainingArguments): Whether to run predictions on the test set or not. This argument is not directly used by [`Trainer`], it's intended to be used by your training/evaluation scripts instead. See the [example scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. - evaluation_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`): + eval_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`): The evaluation strategy to adopt during training. Possible values are: - `"no"`: No evaluation is done during training. diff --git a/src/transformers/utils/notebook.py b/src/transformers/utils/notebook.py index f7396642732e58..9704aca242a60f 100644 --- a/src/transformers/utils/notebook.py +++ b/src/transformers/utils/notebook.py @@ -292,11 +292,11 @@ def __init__(self): self._force_next_update = False def on_train_begin(self, args, state, control, **kwargs): - self.first_column = "Epoch" if args.evaluation_strategy == IntervalStrategy.EPOCH else "Step" + self.first_column = "Epoch" if args.eval_strategy == IntervalStrategy.EPOCH else "Step" self.training_loss = 0 self.last_log = 0 column_names = [self.first_column] + ["Training Loss"] - if args.evaluation_strategy != IntervalStrategy.NO: + if args.eval_strategy != IntervalStrategy.NO: column_names.append("Validation Loss") self.training_tracker = NotebookTrainingTracker(state.max_steps, column_names) @@ -328,7 +328,7 @@ def on_predict(self, args, state, control, **kwargs): def on_log(self, args, state, control, logs=None, **kwargs): # Only for when there is no evaluation - if args.evaluation_strategy == IntervalStrategy.NO and "loss" in logs: + if args.eval_strategy == IntervalStrategy.NO and "loss" in logs: values = {"Training Loss": logs["loss"]} # First column is necessarily Step sine we're not in epoch eval strategy values["Step"] = state.global_step diff --git a/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py index 81308d32c6cf22..f9b74783600b5a 100644 --- a/tests/deepspeed/test_deepspeed.py +++ b/tests/deepspeed/test_deepspeed.py @@ -959,7 +959,7 @@ def test_load_best_model(self, stage, dtype): "do_train": True, "do_eval": True, "optim": "adafactor", - "evaluation_strategy": "steps", + "eval_strategy": "steps", "eval_steps": 1, "save_strategy": "steps", "save_steps": 1, diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index 5c33eb2d9ed750..4bda892162fdad 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -308,7 +308,7 @@ def run_trainer( --per_device_eval_batch_size 4 --max_eval_samples 8 --val_max_target_length {max_len} - --evaluation_strategy steps + --eval_strategy steps --eval_steps {str(eval_steps)} """.split() diff --git a/tests/fsdp/test_fsdp.py b/tests/fsdp/test_fsdp.py index aeb232fd9e8e0b..71293f1601adde 100644 --- a/tests/fsdp/test_fsdp.py +++ b/tests/fsdp/test_fsdp.py @@ -308,6 +308,6 @@ def get_base_args(self, output_dir, num_epochs, logging_steps): --logging_steps {logging_steps} --save_strategy epoch --do_eval - --evaluation_strategy epoch + --eval_strategy epoch --report_to none """ diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 483022a09e6fab..2cbb59f235fe30 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -740,7 +740,7 @@ def test_reduce_lr_on_plateau_args(self): eval_dataset = RegressionDataset(length=64) args = TrainingArguments( "./regression", - evaluation_strategy="epoch", + eval_strategy="epoch", metric_for_best_model="eval_loss", ) model = RegressionModel() @@ -772,7 +772,7 @@ def log(self, logs): args = TrainingArguments( "./regression", lr_scheduler_type="reduce_lr_on_plateau", - evaluation_strategy="epoch", + eval_strategy="epoch", metric_for_best_model="eval_loss", num_train_epochs=10, learning_rate=0.2, @@ -2210,7 +2210,7 @@ def test_load_best_model_at_end(self): output_dir=tmpdir, learning_rate=0.1, eval_steps=5, - evaluation_strategy="steps", + eval_strategy="steps", save_steps=5, load_best_model_at_end=True, ) @@ -2226,7 +2226,7 @@ def test_load_best_model_at_end(self): output_dir=tmpdir, learning_rate=0.1, eval_steps=5, - evaluation_strategy="steps", + eval_strategy="steps", save_steps=5, load_best_model_at_end=True, metric_for_best_model="accuracy", @@ -2243,7 +2243,7 @@ def test_load_best_model_at_end(self): b=2.5, output_dir=tmpdir, learning_rate=0.1, - evaluation_strategy="epoch", + eval_strategy="epoch", save_strategy="epoch", load_best_model_at_end=True, metric_for_best_model="accuracy", @@ -2262,7 +2262,7 @@ def test_load_best_model_at_end(self): output_dir=tmpdir, learning_rate=0.1, eval_steps=5, - evaluation_strategy="steps", + eval_strategy="steps", save_steps=5, load_best_model_at_end=True, pretrained=False, @@ -2283,7 +2283,7 @@ def test_load_best_model_from_safetensors(self): output_dir=tmpdir, learning_rate=0.1, eval_steps=5, - evaluation_strategy="steps", + eval_strategy="steps", save_steps=5, load_best_model_at_end=True, save_safetensors=save_safetensors, @@ -2437,7 +2437,7 @@ def test_early_stopping_callback(self): gradient_accumulation_steps=1, per_device_train_batch_size=16, load_best_model_at_end=True, - evaluation_strategy=IntervalStrategy.EPOCH, + eval_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH, compute_metrics=AlmostAccuracy(), metric_for_best_model="accuracy", @@ -2453,7 +2453,7 @@ def test_early_stopping_callback(self): num_train_epochs=20, gradient_accumulation_steps=1, per_device_train_batch_size=16, - evaluation_strategy=IntervalStrategy.EPOCH, + eval_strategy=IntervalStrategy.EPOCH, compute_metrics=AlmostAccuracy(), metric_for_best_model="accuracy", ) @@ -2497,7 +2497,7 @@ def test_checkpoint_rotation(self): # With best model at end trainer = get_regression_trainer( - output_dir=tmp_dir, evaluation_strategy="steps", load_best_model_at_end=True, save_total_limit=2 + output_dir=tmp_dir, eval_strategy="steps", load_best_model_at_end=True, save_total_limit=2 ) trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-5") self.check_checkpoint_deletion(trainer, tmp_dir, [5, 25]) @@ -2505,7 +2505,7 @@ def test_checkpoint_rotation(self): # Edge case: we don't always honor save_total_limit=1 if load_best_model_at_end=True to be able to resume # from checkpoint trainer = get_regression_trainer( - output_dir=tmp_dir, evaluation_strategy="steps", load_best_model_at_end=True, save_total_limit=1 + output_dir=tmp_dir, eval_strategy="steps", load_best_model_at_end=True, save_total_limit=1 ) trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-25") self.check_checkpoint_deletion(trainer, tmp_dir, [25]) @@ -3312,7 +3312,7 @@ def hp_name(trial): output_dir=tmp_dir, learning_rate=0.1, logging_steps=1, - evaluation_strategy=IntervalStrategy.EPOCH, + eval_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH, num_train_epochs=4, disable_tqdm=True, @@ -3361,7 +3361,7 @@ def compute_objective(metrics: Dict[str, float]) -> List[float]: output_dir=tmp_dir, learning_rate=0.1, logging_steps=1, - evaluation_strategy=IntervalStrategy.EPOCH, + eval_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH, num_train_epochs=10, disable_tqdm=True, @@ -3419,7 +3419,7 @@ def hp_name(params): output_dir=tmp_dir, learning_rate=0.1, logging_steps=1, - evaluation_strategy=IntervalStrategy.EPOCH, + eval_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH, num_train_epochs=4, disable_tqdm=True, @@ -3482,7 +3482,7 @@ def hp_name(trial): output_dir=tmp_dir, learning_rate=0.1, logging_steps=1, - evaluation_strategy=IntervalStrategy.EPOCH, + eval_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH, num_train_epochs=4, disable_tqdm=True, @@ -3902,7 +3902,7 @@ def hp_name(params): output_dir=tmp_dir, learning_rate=0.1, logging_steps=1, - evaluation_strategy=IntervalStrategy.EPOCH, + eval_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH, num_train_epochs=4, disable_tqdm=True, diff --git a/tests/trainer/test_trainer_callback.py b/tests/trainer/test_trainer_callback.py index 8e851132c2daab..b712edca385c25 100644 --- a/tests/trainer/test_trainer_callback.py +++ b/tests/trainer/test_trainer_callback.py @@ -133,12 +133,12 @@ def get_expected_events(self, trainer): expected_events += ["on_step_begin", "on_step_end"] if step % trainer.args.logging_steps == 0: expected_events.append("on_log") - if trainer.args.evaluation_strategy == IntervalStrategy.STEPS and step % trainer.args.eval_steps == 0: + if trainer.args.eval_strategy == IntervalStrategy.STEPS and step % trainer.args.eval_steps == 0: expected_events += evaluation_events.copy() if step % trainer.args.save_steps == 0: expected_events.append("on_save") expected_events.append("on_epoch_end") - if trainer.args.evaluation_strategy == IntervalStrategy.EPOCH: + if trainer.args.eval_strategy == IntervalStrategy.EPOCH: expected_events += evaluation_events.copy() expected_events += ["on_log", "on_train_end"] return expected_events @@ -215,12 +215,12 @@ def test_event_flow(self): events = trainer.callback_handler.callbacks[-2].events self.assertEqual(events, self.get_expected_events(trainer)) - trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], eval_steps=5, evaluation_strategy="steps") + trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], eval_steps=5, eval_strategy="steps") trainer.train() events = trainer.callback_handler.callbacks[-2].events self.assertEqual(events, self.get_expected_events(trainer)) - trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], evaluation_strategy="epoch") + trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], eval_strategy="epoch") trainer.train() events = trainer.callback_handler.callbacks[-2].events self.assertEqual(events, self.get_expected_events(trainer)) @@ -231,7 +231,7 @@ def test_event_flow(self): logging_steps=3, save_steps=10, eval_steps=5, - evaluation_strategy="steps", + eval_strategy="steps", ) trainer.train() events = trainer.callback_handler.callbacks[-2].events diff --git a/tests/trainer/test_trainer_seq2seq.py b/tests/trainer/test_trainer_seq2seq.py index 5520d07c5a5ffc..d8722c67836f26 100644 --- a/tests/trainer/test_trainer_seq2seq.py +++ b/tests/trainer/test_trainer_seq2seq.py @@ -113,7 +113,7 @@ def _compute_metrics(pred): per_device_train_batch_size=batch_size, per_device_eval_batch_size=batch_size, predict_with_generate=True, - evaluation_strategy="steps", + eval_strategy="steps", do_train=True, do_eval=True, warmup_steps=0,