From a4e4055cb6fb406045582ee6c4477fe5af973302 Mon Sep 17 00:00:00 2001 From: Francisco Kurucz Date: Mon, 15 Jan 2024 05:09:22 -0300 Subject: [PATCH] Fix paths to AI Sweden Models reference and model loading (#28423) Fix URL to Ai Sweden Models reference and model loading --- docs/source/en/model_doc/gpt-sw3.md | 6 ++--- .../models/gpt_sw3/tokenization_gpt_sw3.py | 26 +++++++++++-------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/docs/source/en/model_doc/gpt-sw3.md b/docs/source/en/model_doc/gpt-sw3.md index f4d34a07212cd2..f69bd958e9c5f1 100644 --- a/docs/source/en/model_doc/gpt-sw3.md +++ b/docs/source/en/model_doc/gpt-sw3.md @@ -30,15 +30,15 @@ in collaboration with RISE and the WASP WARA for Media and Language. GPT-Sw3 has 320B tokens in Swedish, Norwegian, Danish, Icelandic, English, and programming code. The model was pretrained using a causal language modeling (CLM) objective utilizing the NeMo Megatron GPT implementation. -This model was contributed by [AI Sweden](https://huggingface.co/AI-Sweden). +This model was contributed by [AI Sweden Models](https://huggingface.co/AI-Sweden-Models). ## Usage example ```python >>> from transformers import AutoTokenizer, AutoModelForCausalLM ->>> tokenizer = AutoTokenizer.from_pretrained("AI-Sweden/gpt-sw3-356m") ->>> model = AutoModelForCausalLM.from_pretrained("AI-Sweden/gpt-sw3-356m") +>>> tokenizer = AutoTokenizer.from_pretrained("AI-Sweden-Models/gpt-sw3-356m") +>>> model = AutoModelForCausalLM.from_pretrained("AI-Sweden-Models/gpt-sw3-356m") >>> input_ids = tokenizer("Träd är fina för att", return_tensors="pt")["input_ids"] diff --git a/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py b/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py index 7820dfe86b6e09..d740c13d3594a2 100644 --- a/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py +++ b/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py @@ -21,20 +21,24 @@ PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "AI-Sweden/gpt-sw3-126m": "https://huggingface.co/AI-Sweden/gpt-sw3-126m/resolve/main/spiece.model", - "AI-Sweden/gpt-sw3-350m": "https://huggingface.co/AI-Sweden/gpt-sw3-350m/resolve/main/spiece.model", - "AI-Sweden/gpt-sw3-1.6b": "https://huggingface.co/AI-Sweden/gpt-sw3-1.6b/resolve/main/spiece.model", - "AI-Sweden/gpt-sw3-6.7b": "https://huggingface.co/AI-Sweden/gpt-sw3-6.7b/resolve/main/spiece.model", - "AI-Sweden/gpt-sw3-20b": "https://huggingface.co/AI-Sweden/gpt-sw3-20b/resolve/main/spiece.model", + "AI-Sweden-Models/gpt-sw3-126m": "https://huggingface.co/AI-Sweden-Models/gpt-sw3-126m/resolve/main/spiece.model", + "AI-Sweden-Models/gpt-sw3-356m": "https://huggingface.co/AI-Sweden-Models/gpt-sw3-356m/resolve/main/spiece.model", + "AI-Sweden-Models/gpt-sw3-1.3b": "https://huggingface.co/AI-Sweden-Models/gpt-sw3-1.3b/resolve/main/spiece.model", + "AI-Sweden-Models/gpt-sw3-6.7b": "https://huggingface.co/AI-Sweden-Models/gpt-sw3-6.7b/resolve/main/spiece.model", + "AI-Sweden-Models/gpt-sw3-6.7b-v2": "https://huggingface.co/AI-Sweden-Models/gpt-sw3-6.7b-v2/resolve/main/spiece.model", + "AI-Sweden-Models/gpt-sw3-20b": "https://huggingface.co/AI-Sweden-Models/gpt-sw3-20b/resolve/main/spiece.model", + "AI-Sweden-Models/gpt-sw3-40b": "https://huggingface.co/AI-Sweden-Models/gpt-sw3-20b/resolve/main/spiece.model", } } PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { - "AI-Sweden/gpt-sw3-126m": 2048, - "AI-Sweden/gpt-sw3-350m": 2048, - "AI-Sweden/gpt-sw3-1.6b": 2048, - "AI-Sweden/gpt-sw3-6.7b": 2048, - "AI-Sweden/gpt-sw3-20b": 2048, + "AI-Sweden-Models/gpt-sw3-126m": 2048, + "AI-Sweden-Models/gpt-sw3-356m": 2048, + "AI-Sweden-Models/gpt-sw3-1.3b": 2048, + "AI-Sweden-Models/gpt-sw3-6.7b": 2048, + "AI-Sweden-Models/gpt-sw3-6.7b-v2": 2048, + "AI-Sweden-Models/gpt-sw3-20b": 2048, + "AI-Sweden-Models/gpt-sw3-40b": 2048, } @@ -49,7 +53,7 @@ class GPTSw3Tokenizer(PreTrainedTokenizer): ```python >>> from transformers import GPTSw3Tokenizer - >>> tokenizer = GPTSw3Tokenizer.from_pretrained("AI-Sweden/gpt-sw3-126m") + >>> tokenizer = GPTSw3Tokenizer.from_pretrained("AI-Sweden-Models/gpt-sw3-126m") >>> tokenizer("Svenska är kul!")["input_ids"] [1814, 377, 3617, 63504] ```