mosaicml · dakinggg · Apr 23, 2024 · Apr 19, 2024 · Apr 19, 2024 · Apr 19, 2024
@@ -14,6 +14,7 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 
+import numpy as np
 import torch
 import torch.nn as nn
 from composer.core import Callback, Event, State, Time, TimeUnit
@@ -160,8 +161,6 @@ def __init__(
         if mlflow_logging_config is None:
             mlflow_logging_config = {}
         if self.mlflow_registered_model_name is not None:
-            import numpy as np
-
             # Both the metadata and the task are needed in order for mlflow
             # and databricks optimized model serving to work
             passed_metadata = mlflow_logging_config.get('metadata', {})
@@ -171,18 +170,17 @@ def __init__(
             default_input_example = {
                 'prompt': np.array(['What is Machine Learning?'])
             }
-            is_chat = mlflow_logging_config['task'].endswith(
-                'chat') or mlflow_logging_config['metadata'].get(
-                    'task', '').endswith('chat')
+            is_chat = mlflow_logging_config['task'].endswith('chat') or (
+                mlflow_logging_config['metadata'] is not None and
+                mlflow_logging_config['metadata'].get('task',
+                                                      '').endswith('chat'))
             if is_chat:
                 default_input_example = {
-                    'messages':
-                        np.array([{
-                            'role': 'user',
-                            'content': 'What is Machine Learning?'
-                        }])
+                    'messages': [{
+                        'role': 'user',
+                        'content': 'What is Machine Learning?'
+                    }]
                 }
-                mlflow_logging_config.setdefault('example_no_conversion', True)
             mlflow_logging_config.setdefault('input_example',
                                              default_input_example)
 
@@ -260,6 +258,13 @@ def _is_last_batch(self, state: State):
             return True
 
         assert state.max_duration is not None  # for pyright
+
+        epoch_complete = state.dataloader_len == state.timestamp.batch_in_epoch
+        second_to_last_epoch = state.max_duration.unit == TimeUnit.EPOCH and (
+            state.timestamp.epoch == state.max_duration.value - 1)
+        if self.save_interval.unit == TimeUnit.BATCH and second_to_last_epoch and epoch_complete:
+            return True
+
         # If the save interval is specified as 1dur, and the max duration is in epoch units
         # we need a special case to identify we are on the last batch and should write the mlflow checkpoint
         if self.save_interval.unit == TimeUnit.DURATION and self.save_interval.value == 1 and state.max_duration.unit == TimeUnit.EPOCH:

@@ -54,7 +54,8 @@
 
 install_requires = [
     'mosaicml[libcloud,wandb,oci,gcs]>=0.21.3,<0.22',
-    'mlflow>=2.10,<2.12',
+    'mlflow>=2.12.1,<2.13',
+    'pandas',
     'accelerate>=0.25,<0.26',  # for HF inference `device_map`
     'transformers>=4.40,<4.41',
     'mosaicml-streaming>=0.7.5,<0.8',