diff --git a/convert.py b/convert.py index 1290c9b3824a05..67e281aa57200e 100755 --- a/convert.py +++ b/convert.py @@ -380,6 +380,39 @@ def load(model_plus: ModelPlus) -> "Params": return params +@dataclass +class Metadata: + name: Optional[str] = None + author: Optional[str] = None + version: Optional[str] = None + url: Optional[str] = None + description: Optional[str] = None + licence: Optional[str] = None + source_url: Optional[str] = None + source_hf_repo: Optional[str] = None + + @staticmethod + def load(metadata_path: Path) -> "Metadata": + if metadata_path is None or not metadata_path.exists(): + return Metadata() + + with open(metadata_path, 'r') as file: + data = json.load(file) + + # Create a new Metadata instance + metadata = Metadata() + + # Assigning values to Metadata attributes if they exist in the JSON file + metadata.name = data.get("general.name") + metadata.author = data.get("general.author") + metadata.version = data.get("general.version") + metadata.url = data.get("general.url") + metadata.description = data.get("general.description") + metadata.license = data.get("general.license") + + return metadata + + class BpeVocab: # GPT def __init__( self, fname_tokenizer: Path, fname_added_tokens: Optional[Path] @@ -1026,16 +1059,37 @@ def __init__( fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess ) - def add_meta_arch(self, params: Params) -> None: + def add_meta_model(self, params: Params, metadata: Metadata) -> None: + # Metadata About The Model And It's Provenence name = "LLaMA" - - # TODO: better logic to determine model name - if params.n_ctx == 4096: - name = "LLaMA v2" + if metadata is not None and metadata.name is not None: + name = metadata.name elif params.path_model is not None: name = str(params.path_model.parent).split("/")[-1] + elif params.n_ctx == 4096: + # Heuristic detection of LLaMA v2 model + name = "LLaMA v2" self.gguf.add_name(name) + + if metadata is not None: + if metadata.author is not None: + self.gguf.add_author(metadata.author) + if metadata.version is not None: + self.gguf.add_version(metadata.version) + if metadata.url is not None: + self.gguf.add_url(metadata.url) + if metadata.description is not None: + self.gguf.add_description(metadata.description) + if metadata.licence is not None: + self.gguf.add_licence(metadata.licence) + if metadata.source_url is not None: + self.gguf.add_source_url(metadata.source_url) + if metadata.source_hf_repo is not None: + self.gguf.add_source_hf_repo(metadata.source_hf_repo) + + def add_meta_arch(self, params: Params) -> None: + # Metadata About The Neural Architecture Itself self.gguf.add_context_length(params.n_ctx) self.gguf.add_embedding_length(params.n_embd) self.gguf.add_block_count(params.n_layer) @@ -1146,12 +1200,14 @@ def write_vocab_only( svocab: gguf.SpecialVocab, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, + metadata: Metadata = None, ) -> None: check_vocab_size(params, vocab, pad_vocab=pad_vocab) of = OutputFile(fname_out, endianess=endianess) # meta data + of.add_meta_model(params, metadata) of.add_meta_arch(params) of.add_meta_vocab(vocab) of.add_meta_special_vocab(svocab) @@ -1184,12 +1240,14 @@ def write_all( concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, + metadata: Metadata = None, ) -> None: check_vocab_size(params, vocab, pad_vocab=pad_vocab) of = OutputFile(fname_out, endianess=endianess) # meta data + of.add_meta_model(params, metadata) of.add_meta_arch(params) of.add_meta_vocab(vocab) of.add_meta_special_vocab(svocab) @@ -1463,7 +1521,7 @@ def load_vocab( return vocab, special_vocab -def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int) -> Path: +def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path: quantization = { GGMLFileType.AllF32: "f32", GGMLFileType.MostlyF16: "f16", @@ -1472,11 +1530,17 @@ def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Pa parameters = model_parameter_count_rounded_notation(model_params_count) + version = "" + if metadata is not None and metadata.version is not None: + version = f"-{metadata.version}" + name = "ggml-model" - if params.path_model is not None: + if metadata is not None and metadata.name is not None: + name = metadata.name + elif params.path_model is not None: name = params.path_model.name - ret = model_paths[0].parent / f"{name}-{parameters}-{quantization}.gguf" + ret = model_paths[0].parent / f"{name}{version}-{parameters}-{quantization}.gguf" if ret in model_paths: sys.stderr.write( f"Error: Default output path ({ret}) would overwrite the input. " @@ -1585,6 +1649,13 @@ def get_argument_parser() -> ArgumentParser: help="Indicate that the model is executed on a big-endian machine", ) + # https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#general-metadata + parser.add_argument( + "--metadata", + type=Path, + help="Specify the path for a metadata file", + ) + return parser @@ -1592,6 +1663,8 @@ def main(argv: Optional[list[str]] = None) -> None: parser = get_argument_parser() args = parser.parse_args(argv) + metadata = Metadata.load(args.metadata) + if args.awq_path: sys.path.insert(1, str(Path(__file__).resolve().parent / "awq-py")) from awq.apply_awq import add_scale_weights @@ -1665,6 +1738,7 @@ def main(argv: Optional[list[str]] = None) -> None: special_vocab, endianess=endianess, pad_vocab=args.pad_vocab, + metadata=metadata, ) print(f"Wrote {outfile}") return @@ -1683,7 +1757,7 @@ def main(argv: Optional[list[str]] = None) -> None: model = convert_model_names(model, params) ftype = pick_output_type(model, args.outtype) model = convert_to_output_type(model, ftype) - outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count) + outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata) params.ftype = ftype print(f"Writing {outfile}, format {ftype}") @@ -1698,6 +1772,7 @@ def main(argv: Optional[list[str]] = None) -> None: concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab, + metadata=metadata, ) print(f"Wrote {outfile}") diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index f0a1c51f8dbe86..a1132287ab483e 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -24,6 +24,7 @@ class General: ALIGNMENT = "general.alignment" NAME = "general.name" AUTHOR = "general.author" + VERSION = "general.version" URL = "general.url" DESCRIPTION = "general.description" LICENSE = "general.license" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index d93aaa877171fa..a5fc8747ff3481 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -283,6 +283,9 @@ def add_architecture(self) -> None: def add_author(self, author: str) -> None: self.add_string(Keys.General.AUTHOR, author) + def add_version(self, version: str) -> None: + self.add_string(Keys.General.VERSION, version) + def add_tensor_data_layout(self, layout: str) -> None: self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout) @@ -292,6 +295,9 @@ def add_url(self, url: str) -> None: def add_description(self, description: str) -> None: self.add_string(Keys.General.DESCRIPTION, description) + def add_licence(self, licence: str) -> None: + self.add_string(Keys.General.LICENSE, licence) + def add_source_url(self, url: str) -> None: self.add_string(Keys.General.SOURCE_URL, url) diff --git a/requirements/requirements-convert.txt b/requirements/requirements-convert.txt index a3d6ecec0ac043..727bcd23a19ac7 100644 --- a/requirements/requirements-convert.txt +++ b/requirements/requirements-convert.txt @@ -3,3 +3,4 @@ sentencepiece~=0.1.98 transformers>=4.35.2,<5.0.0 gguf>=0.1.0 protobuf>=4.21.0,<5.0.0 +python-frontmatter>=1.0.1 \ No newline at end of file