diff --git a/mediapipe/tasks/cc/genai/inference/proto/llm_params.proto b/mediapipe/tasks/cc/genai/inference/proto/llm_params.proto index ea98937486..e613198f50 100644 --- a/mediapipe/tasks/cc/genai/inference/proto/llm_params.proto +++ b/mediapipe/tasks/cc/genai/inference/proto/llm_params.proto @@ -94,4 +94,8 @@ message LlmParameters { optional string system_role_token = 12; optional string model_role_token = 13; optional string end_role_token = 14; + + // If this model includes a submodel, these params can be used to load the + // submodel. + optional TransformerParameters submodel_transformer_parameters = 16; } diff --git a/mediapipe/tasks/cc/genai/inference/proto/transformer_params.proto b/mediapipe/tasks/cc/genai/inference/proto/transformer_params.proto index 7b9bdf4d84..c87f758e82 100644 --- a/mediapipe/tasks/cc/genai/inference/proto/transformer_params.proto +++ b/mediapipe/tasks/cc/genai/inference/proto/transformer_params.proto @@ -194,4 +194,8 @@ message TransformerParameters { // Vision parameters int32 vision_tokens_num = 26; + + // The number of stacks that are treated as "extra", which may have slightly + // different loading behavior. + int32 num_extra_stacks = 27; } diff --git a/mediapipe/tasks/python/genai/converter/llm_converter.py b/mediapipe/tasks/python/genai/converter/llm_converter.py index e92f241500..81ec5bfa36 100644 --- a/mediapipe/tasks/python/genai/converter/llm_converter.py +++ b/mediapipe/tasks/python/genai/converter/llm_converter.py @@ -50,6 +50,7 @@ class ConversionConfig(object): zero. image_encoder_file: A string with the name of the image encoder tflite file. image_adapter_file: A string with the name of the image adapter tflite file. + submodel_type: Name of submodel, e.g. GEMMA_2B. use_fake_weights: Whether to use fake weights. If set to True, the weights will be filled with zeros. """ @@ -75,6 +76,7 @@ def __init__( lora_output_tflite_file: Optional[str] = None, image_encoder_file: Optional[str] = None, image_adapter_file: Optional[str] = None, + submodel_type: Optional[str] = None, use_fake_weights: bool = False, ): self.input_ckpt = input_ckpt @@ -96,6 +98,7 @@ def __init__( self.obfuscate = obfuscate self.image_encoder_file = image_encoder_file self.image_adapter_file = image_adapter_file + self.submodel_type = submodel_type self.use_fake_weights = use_fake_weights if output_tflite_file: parent_dir = os.path.dirname(output_tflite_file) @@ -220,6 +223,7 @@ def combined_weight_bins_to_tflite( lora_output_tflite_file: Optional[str] = None, image_encoder_file: Optional[str] = None, image_adapter_file: Optional[str] = None, + submodel_type: Optional[str] = None, ): """Combines weight files to tflite file.""" if backend == 'cpu': @@ -245,6 +249,7 @@ def combined_weight_bins_to_tflite( '' if lora_output_tflite_file is None else lora_output_tflite_file, '' if image_encoder_file is None else image_encoder_file, '' if image_adapter_file is None else image_adapter_file, + '' if submodel_type is None else submodel_type, ) else: raise ValueError('Unsupported backend: %s' % backend) @@ -365,4 +370,5 @@ def convert_checkpoint(config: ConversionConfig) -> None: lora_output_tflite_file=config.lora_output_tflite_file, image_encoder_file=config.image_encoder_file, image_adapter_file=config.image_adapter_file, + submodel_type=config.submodel_type, )