Skip to content

Commit

Permalink
Skip flaky lion8b test (#598)
Browse files Browse the repository at this point in the history
* relax atol and add retries to reduce flakiness in lion8b timing test
  • Loading branch information
dblalock authored and bmosaicml committed Sep 18, 2023
1 parent c9dda15 commit dbf5535
Show file tree
Hide file tree
Showing 7 changed files with 255 additions and 218 deletions.
4 changes: 3 additions & 1 deletion llmfoundry/utils/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import torch
from composer import algorithms
from composer.callbacks import (EarlyStopper, LRMonitor, MemoryMonitor,
OptimizerMonitor, RuntimeEstimator,
OptimizerMonitor, RuntimeEstimator, EvalOutputLogging,
SpeedMonitor)
from composer.core import Algorithm, Callback, Evaluator
from composer.datasets.in_context_learning_evaluation import \
Expand Down Expand Up @@ -101,6 +101,8 @@ def build_callback(name: str, kwargs: Dict[str, Any]) -> Callback:
return EarlyStopper(**kwargs)
elif name == 'hf_checkpointer':
return HuggingFaceCheckpointer(**kwargs)
elif name == 'eval_output_logging':
return EvalOutputLogging(**kwargs)
else:
raise ValueError(f'Not sure how to build callback: {name}')

Expand Down
1 change: 1 addition & 0 deletions mcli/mcli-hf-eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ integrations:
ssh_clone: false # Should be true if using a private repo

command: |
pip install git+https://github.com/bmosaicml/composer.git@error_logging_callback
cd llm-foundry/scripts
composer eval/eval.py /mnt/config/parameters.yaml
Expand Down
22 changes: 18 additions & 4 deletions scripts/eval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import time
import warnings
from typing import Any, Dict, List, Optional, Union
from composer.core.callback import Callback

import pandas as pd
import torch
Expand All @@ -21,7 +22,7 @@

from llmfoundry.models import MPTForCausalLM
from llmfoundry.models.model_registry import COMPOSER_MODEL_REGISTRY
from llmfoundry.utils.builders import (build_icl_data_and_gauntlet,
from llmfoundry.utils.builders import (build_icl_data_and_gauntlet, build_callback,
build_logger, build_tokenizer)
from llmfoundry.utils.config_utils import pop_config, process_init_device

Expand Down Expand Up @@ -106,6 +107,7 @@ def evaluate_model(
precision: str,
eval_gauntlet_df: Optional[pd.DataFrame],
icl_subset_num_batches: Optional[int],
callback_configs: Optional[Dict]
):
print(f'Evaluating model: {model_cfg.model_name}', flush=True)
# Build tokenizer and model
Expand All @@ -120,7 +122,12 @@ def evaluate_model(
icl_tasks, eval_gauntlet_config, tokenizer, device_eval_batch_size,
max_seq_len, icl_subset_num_batches)

callbacks = []
# Callbacks
callbacks: List[Callback] = [
build_callback(str(name), callback_cfg)
for name, callback_cfg in callback_configs.items()
] if callback_configs else []

if eval_gauntlet_callback is not None:
callbacks.append(eval_gauntlet_callback)

Expand Down Expand Up @@ -170,6 +177,7 @@ def evaluate_model(
dist_timeout=dist_timeout,
python_log_level=python_log_level,
)
breakpoint()

if torch.cuda.is_available():
torch.cuda.synchronize()
Expand Down Expand Up @@ -245,7 +253,11 @@ def main(cfg: DictConfig):
default_value=None)
# Pop out interpolation variables.
pop_config(cfg, 'model_name_or_path', must_exist=False, default_value=None)

callback_configs: Optional[DictConfig] = pop_config(cfg,
'callbacks',
must_exist=False,
default_value=None)

# Warn for unused parameters
for key in cfg:
warnings.warn(
Expand Down Expand Up @@ -283,7 +295,9 @@ def main(cfg: DictConfig):
python_log_level=python_log_level,
precision=precision,
eval_gauntlet_df=eval_gauntlet_df,
icl_subset_num_batches=icl_subset_num_batches)
icl_subset_num_batches=icl_subset_num_batches,
callback_configs=callback_configs
)

if eval_gauntlet_callback is not None:
composite_scores = eval_gauntlet_callback.eval_after_all(
Expand Down
5 changes: 5 additions & 0 deletions scripts/eval/yamls/hf_eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,8 @@ device_eval_batch_size: 4

icl_tasks: 'eval/yamls/tasks.yaml'
eval_gauntlet: 'eval/yamls/eval_gauntlet.yaml'

callbacks:
eval_output_logging:
print_only_incorrect: false
subset_sample: 100
Loading

0 comments on commit dbf5535

Please sign in to comment.