From f83d85cbde933ddd78a55a02b053cbef3044d1c3 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 11 Sep 2023 18:50:28 +0200 Subject: [PATCH 01/14] Add link to optimum docs for supported architectures Closes #288 --- README.md | 2 ++ docs/snippets/4_custom-usage.snippet | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index 815ed4c5e..6e34328ef 100644 --- a/README.md +++ b/README.md @@ -170,6 +170,8 @@ bert-base-uncased/ └── model_quantized.onnx ``` +For the full list of supported architectures, see the [Optimum documentation](https://huggingface.co/docs/optimum/main/en/exporters/onnx/overview). + ## Supported tasks/models diff --git a/docs/snippets/4_custom-usage.snippet b/docs/snippets/4_custom-usage.snippet index a656a7b1c..db93f975e 100644 --- a/docs/snippets/4_custom-usage.snippet +++ b/docs/snippets/4_custom-usage.snippet @@ -44,3 +44,5 @@ bert-base-uncased/ ├── model.onnx └── model_quantized.onnx ``` + +For the full list of supported architectures, see the [Optimum documentation](https://huggingface.co/docs/optimum/main/en/exporters/onnx/overview). From 54b3de042a196540ec0ecb60ad89d69227539425 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Tue, 12 Sep 2023 20:45:17 +0200 Subject: [PATCH 02/14] Refactor `SUPPORTED_MODELS` dict to include task --- scripts/supported_models.py | 912 +++++++++++++++++++++--------------- tests/generate_tests.py | 13 +- 2 files changed, 537 insertions(+), 388 deletions(-) diff --git a/scripts/supported_models.py b/scripts/supported_models.py index 0b4312d34..928728eaa 100644 --- a/scripts/supported_models.py +++ b/scripts/supported_models.py @@ -3,100 +3,118 @@ SUPPORTED_MODELS = { # NOTE: keys of `SUPPORTED_MODELS` are subsets of https://github.com/huggingface/optimum/blob/7f8e606689365931300ef5e6d3b20cb88771cb08/optimum/exporters/tasks.py#L281-L965 - 'albert': [ + 'albert': { # Masked language modelling - 'albert-base-v2', - 'albert-large-v2', + 'fill-mask': [ + 'albert-base-v2', + 'albert-large-v2', + ], # Feature extraction - 'sentence-transformers/paraphrase-albert-small-v2', - 'sentence-transformers/paraphrase-albert-base-v2', - ], - 'bart': [ + 'feature-extraction': [ + 'sentence-transformers/paraphrase-albert-small-v2', + 'sentence-transformers/paraphrase-albert-base-v2', + ], + }, + 'bart': { # Summarization - 'sshleifer/distilbart-xsum-12-1', - 'sshleifer/distilbart-xsum-6-6', - 'sshleifer/distilbart-xsum-12-3', - 'sshleifer/distilbart-xsum-9-6', - 'sshleifer/distilbart-xsum-12-6', - 'sshleifer/distilbart-cnn-12-3', - 'sshleifer/distilbart-cnn-12-6', - 'sshleifer/distilbart-cnn-6-6', - 'facebook/bart-large-cnn', - 'facebook/bart-large-xsum', - + 'summarization': [ + 'sshleifer/distilbart-xsum-12-1', + 'sshleifer/distilbart-xsum-6-6', + 'sshleifer/distilbart-xsum-12-3', + 'sshleifer/distilbart-xsum-9-6', + 'sshleifer/distilbart-xsum-12-6', + 'sshleifer/distilbart-cnn-12-3', + 'sshleifer/distilbart-cnn-12-6', + 'sshleifer/distilbart-cnn-6-6', + 'facebook/bart-large-cnn', + 'facebook/bart-large-xsum', + ], # Zero-shot classification - 'facebook/bart-large-mnli', - ], - 'beit': [ + 'zero-shot-classification': { + 'facebook/bart-large-mnli', + }, + }, + 'beit': { # Image classification - 'microsoft/beit-base-patch16-224', - 'microsoft/beit-base-patch16-224-pt22k', - 'microsoft/beit-base-patch16-384', - 'microsoft/beit-base-patch16-224-pt22k-ft22k', - 'microsoft/beit-large-patch16-224', - 'microsoft/beit-large-patch16-224-pt22k', - 'microsoft/beit-large-patch16-512', - 'microsoft/beit-large-patch16-224-pt22k-ft22k', - 'microsoft/beit-large-patch16-384', - 'microsoft/dit-base-finetuned-rvlcdip', - 'microsoft/dit-large-finetuned-rvlcdip', - ], - 'bert': [ + 'image-classification': [ + 'microsoft/beit-base-patch16-224', + 'microsoft/beit-base-patch16-224-pt22k', + 'microsoft/beit-base-patch16-384', + 'microsoft/beit-base-patch16-224-pt22k-ft22k', + 'microsoft/beit-large-patch16-224', + 'microsoft/beit-large-patch16-224-pt22k', + 'microsoft/beit-large-patch16-512', + 'microsoft/beit-large-patch16-224-pt22k-ft22k', + 'microsoft/beit-large-patch16-384', + 'microsoft/dit-base-finetuned-rvlcdip', + 'microsoft/dit-large-finetuned-rvlcdip', + ], + }, + 'bert': { # Feature extraction - 'sentence-transformers/all-MiniLM-L6-v2', - 'sentence-transformers/all-MiniLM-L12-v2', - 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', - 'sentence-transformers/paraphrase-MiniLM-L6-v2', - 'sentence-transformers/paraphrase-MiniLM-L3-v2', - 'sentence-transformers/bert-base-nli-mean-tokens', - 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1', - 'sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens', - 'sentence-transformers/LaBSE', - 'deepset/sentence_bert', - 'intfloat/e5-small', - 'intfloat/e5-small-v2', - 'intfloat/e5-base', - 'intfloat/e5-base-v2', - 'intfloat/e5-large', - 'intfloat/e5-large-v2', - 'intfloat/multilingual-e5-base', - 'thenlper/gte-small', - 'thenlper/gte-base', - 'thenlper/gte-large', - 'BAAI/bge-small-en', - 'BAAI/bge-base-en', - 'BAAI/bge-large-en', - 'allenai/scibert_scivocab_uncased', - 'SpanBERT/spanbert-large-cased', - 'SpanBERT/spanbert-base-cased', - 'cambridgeltl/SapBERT-from-PubMedBERT-fulltext', - 'indobenchmark/indobert-base-p1', - 'GanjinZero/UMLSBert_ENG', - 'DeepPavlov/rubert-base-cased', - 'monologg/kobert', + 'feature-extraction': [ + 'sentence-transformers/all-MiniLM-L6-v2', + 'sentence-transformers/all-MiniLM-L12-v2', + 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', + 'sentence-transformers/paraphrase-MiniLM-L6-v2', + 'sentence-transformers/paraphrase-MiniLM-L3-v2', + 'sentence-transformers/bert-base-nli-mean-tokens', + 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1', + 'sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens', + 'sentence-transformers/LaBSE', + 'deepset/sentence_bert', + 'intfloat/e5-small', + 'intfloat/e5-small-v2', + 'intfloat/e5-base', + 'intfloat/e5-base-v2', + 'intfloat/e5-large', + 'intfloat/e5-large-v2', + 'intfloat/multilingual-e5-base', + 'thenlper/gte-small', + 'thenlper/gte-base', + 'thenlper/gte-large', + 'BAAI/bge-small-en', + 'BAAI/bge-base-en', + 'BAAI/bge-large-en', + 'allenai/scibert_scivocab_uncased', + 'SpanBERT/spanbert-large-cased', + 'SpanBERT/spanbert-base-cased', + 'cambridgeltl/SapBERT-from-PubMedBERT-fulltext', + 'indobenchmark/indobert-base-p1', + 'GanjinZero/UMLSBert_ENG', + 'DeepPavlov/rubert-base-cased', + 'monologg/kobert', + ], # Text classification - 'nlptown/bert-base-multilingual-uncased-sentiment', - 'ProsusAI/finbert', - 'unitary/toxic-bert', + 'text-classification': [ + 'nlptown/bert-base-multilingual-uncased-sentiment', + 'ProsusAI/finbert', + 'unitary/toxic-bert', + ], + # Token classification - 'Davlan/bert-base-multilingual-cased-ner-hrl', - 'ckiplab/bert-base-chinese-ner', - 'ckiplab/bert-base-chinese-ws', - 'ckiplab/bert-base-chinese-pos', - 'dslim/bert-base-NER', - 'dslim/bert-base-NER-uncased', + 'token-classification': [ + 'Davlan/bert-base-multilingual-cased-ner-hrl', + 'ckiplab/bert-base-chinese-ner', + 'ckiplab/bert-base-chinese-ws', + 'ckiplab/bert-base-chinese-pos', + 'dslim/bert-base-NER', + 'dslim/bert-base-NER-uncased', + ], # Masked language modelling - 'bert-base-uncased', - 'bert-base-cased', - 'bert-base-multilingual-uncased', - 'bert-base-multilingual-cased', - 'bert-base-chinese', - 'emilyalsentzer/Bio_ClinicalBERT', - ], + 'fill-mask': [ + 'bert-base-uncased', + 'bert-base-cased', + 'bert-base-multilingual-uncased', + 'bert-base-multilingual-cased', + 'bert-base-chinese', + 'emilyalsentzer/Bio_ClinicalBERT', + ] + }, # 'blenderbot': [ # # Text2text generation (TODO add conversational) # 'facebook/blenderbot-400M-distill', @@ -107,385 +125,509 @@ # 'facebook/blenderbot-90M', # DEPRECATED # 'facebook/blenderbot_small-90M', # ], - 'bloom': [ + 'bloom': { # Text generation - 'bigscience/bloom-560m', - 'bigscience/bloomz-560m', - ], - 'camembert': [ + 'text-generation': [ + 'bigscience/bloom-560m', + 'bigscience/bloomz-560m', + ], + }, + + + + 'camembert': { # Feature extraction - 'dangvantuan/sentence-camembert-large', + 'feature-extraction': [ + 'dangvantuan/sentence-camembert-large', + ], # Token classification - 'Jean-Baptiste/camembert-ner', - 'Jean-Baptiste/camembert-ner-with-dates', - 'pythainlp/thainer-corpus-v2-base-model', - 'gilf/french-camembert-postag-model', + 'token-classification': [ + 'Jean-Baptiste/camembert-ner', + 'Jean-Baptiste/camembert-ner-with-dates', + 'pythainlp/thainer-corpus-v2-base-model', + 'gilf/french-camembert-postag-model', + ], # Masked language modelling - 'camembert-base', - 'airesearch/wangchanberta-base-att-spm-uncased', - ], - 'clip': [ - # Zero-shot image classification and feature extraction + 'fill-mask': [ + 'camembert-base', + 'airesearch/wangchanberta-base-att-spm-uncased', + ], + }, + + 'clip': { + # Zero-shot image classification (and feature extraction) # (with and without `--split_modalities`) - 'openai/clip-vit-base-patch16', - 'openai/clip-vit-base-patch32', - 'openai/clip-vit-large-patch14', - 'openai/clip-vit-large-patch14-336', - ], - 'codegen': [ + 'zero-shot-image-classification': [ + 'openai/clip-vit-base-patch16', + 'openai/clip-vit-base-patch32', + 'openai/clip-vit-large-patch14', + 'openai/clip-vit-large-patch14-336', + ] + }, + 'codegen': { # Text generation - 'Salesforce/codegen-350M-mono', - 'Salesforce/codegen-350M-multi', - 'Salesforce/codegen-350M-nl', - ], - 'deberta': [ + 'text-generation': [ + 'Salesforce/codegen-350M-mono', + 'Salesforce/codegen-350M-multi', + 'Salesforce/codegen-350M-nl', + ], + }, + 'deberta': { # Zero-shot classification - 'cross-encoder/nli-deberta-base', - 'Narsil/deberta-large-mnli-zero-cls', - ], - 'deberta-v2': [ + 'zero-shot-classification': [ + 'cross-encoder/nli-deberta-base', + 'Narsil/deberta-large-mnli-zero-cls', + ], + }, + 'deberta-v2': { # Zero-shot classification - 'cross-encoder/nli-deberta-v3-xsmall', - 'cross-encoder/nli-deberta-v3-small', - 'cross-encoder/nli-deberta-v3-base', - 'cross-encoder/nli-deberta-v3-large', - 'MoritzLaurer/DeBERTa-v3-xsmall-mnli-fever-anli-ling-binary', - 'MoritzLaurer/DeBERTa-v3-base-mnli', - 'MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli', - 'MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli', - 'MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7', - 'sileod/deberta-v3-base-tasksource-nli', - 'sileod/deberta-v3-large-tasksource-nli', - ], - 'deit': [ + 'zero-shot-classification': [ + 'cross-encoder/nli-deberta-v3-xsmall', + 'cross-encoder/nli-deberta-v3-small', + 'cross-encoder/nli-deberta-v3-base', + 'cross-encoder/nli-deberta-v3-large', + 'MoritzLaurer/DeBERTa-v3-xsmall-mnli-fever-anli-ling-binary', + 'MoritzLaurer/DeBERTa-v3-base-mnli', + 'MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli', + 'MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli', + 'MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7', + 'sileod/deberta-v3-base-tasksource-nli', + 'sileod/deberta-v3-large-tasksource-nli', + ], + }, + 'deit': { # Image classification - 'facebook/deit-tiny-distilled-patch16-224', - 'facebook/deit-small-distilled-patch16-224', - 'facebook/deit-base-distilled-patch16-224', - 'facebook/deit-base-distilled-patch16-384', - ], - 'detr': [ + 'image-classification': [ + 'facebook/deit-tiny-distilled-patch16-224', + 'facebook/deit-small-distilled-patch16-224', + 'facebook/deit-base-distilled-patch16-224', + 'facebook/deit-base-distilled-patch16-384', + ], + }, + 'detr': { # Object detection - 'facebook/detr-resnet-50', - 'facebook/detr-resnet-101', + 'object-detection': [ + 'facebook/detr-resnet-50', + 'facebook/detr-resnet-101', + ], # Image segmentation - 'facebook/detr-resnet-50-panoptic', - ], - 'distilbert': [ + 'image-segmentation': [ + 'facebook/detr-resnet-50-panoptic', + ], + }, + 'distilbert': { # Feature extraction - 'sentence-transformers/multi-qa-distilbert-cos-v1', - 'sentence-transformers/distiluse-base-multilingual-cased-v1', - 'sentence-transformers/distiluse-base-multilingual-cased-v2', - 'sentence-transformers/distilbert-base-nli-mean-tokens', - 'sentence-transformers/distilbert-base-nli-stsb-mean-tokens', - 'sentence-transformers/msmarco-distilbert-base-v4', + 'feature-extraction': [ + 'sentence-transformers/multi-qa-distilbert-cos-v1', + 'sentence-transformers/distiluse-base-multilingual-cased-v1', + 'sentence-transformers/distiluse-base-multilingual-cased-v2', + 'sentence-transformers/distilbert-base-nli-mean-tokens', + 'sentence-transformers/distilbert-base-nli-stsb-mean-tokens', + 'sentence-transformers/msmarco-distilbert-base-v4', + ], # Text classification - 'distilbert-base-uncased-finetuned-sst-2-english', + 'text-classification': [ + 'distilbert-base-uncased-finetuned-sst-2-english', + ], # Question answering - 'distilbert-base-uncased-distilled-squad', - 'distilbert-base-cased-distilled-squad', + 'question-answering': [ + 'distilbert-base-uncased-distilled-squad', + 'distilbert-base-cased-distilled-squad', + ], # Zero-shot classification - 'typeform/distilbert-base-uncased-mnli', + 'zero-shot-classification': [ + 'typeform/distilbert-base-uncased-mnli', + ], # Token classification - 'Davlan/distilbert-base-multilingual-cased-ner-hrl', + 'token-classification': [ + 'Davlan/distilbert-base-multilingual-cased-ner-hrl', + ], # Masked language modelling - 'distilbert-base-uncased', - 'distilbert-base-cased', - ], - 'donut': [ + 'fill-mask': [ + 'distilbert-base-uncased', + 'distilbert-base-cased', + ], + }, + 'donut': { # Image-to-text - 'naver-clova-ix/donut-base-finetuned-cord-v2', + 'image-to-text': [ + 'naver-clova-ix/donut-base-finetuned-cord-v2', + ], # Document Question Answering - 'naver-clova-ix/donut-base-finetuned-docvqa', - ], - 'gpt_neo': [ + 'document-question-answering': [ + 'naver-clova-ix/donut-base-finetuned-docvqa', + ], + }, + 'gpt_neo': { # Text generation - 'EleutherAI/gpt-neo-125M', - 'MBZUAI/LaMini-Neo-125M', - # 'MBZUAI/LaMini-Neo-1.3B', # TODO add - 'iliemihai/gpt-neo-romanian-125m', - ], - 'gpt_neox': [ + 'text-generation': [ + 'EleutherAI/gpt-neo-125M', + 'MBZUAI/LaMini-Neo-125M', + # 'MBZUAI/LaMini-Neo-1.3B', # TODO add + 'iliemihai/gpt-neo-romanian-125m', + ], + }, + 'gpt_neox': { # Text generation - 'EleutherAI/pythia-14m', - 'EleutherAI/pythia-31m', - 'EleutherAI/pythia-70m', - 'EleutherAI/pythia-70m-deduped', - 'EleutherAI/pythia-160m', - 'EleutherAI/pythia-160m-deduped', - 'EleutherAI/pythia-410m', - 'EleutherAI/pythia-410m-deduped', - ], - 'gpt2': [ + 'text-generation': [ + 'EleutherAI/pythia-14m', + 'EleutherAI/pythia-31m', + 'EleutherAI/pythia-70m', + 'EleutherAI/pythia-70m-deduped', + 'EleutherAI/pythia-160m', + 'EleutherAI/pythia-160m-deduped', + 'EleutherAI/pythia-410m', + 'EleutherAI/pythia-410m-deduped', + ], + }, + 'gpt2': { # Text generation - 'gpt2', - 'distilgpt2', - 'MBZUAI/LaMini-Cerebras-111M', - 'MBZUAI/LaMini-Cerebras-256M', - 'MBZUAI/LaMini-Cerebras-590M', - # 'MBZUAI/LaMini-Cerebras-1.3B', # TODO add - 'MBZUAI/LaMini-GPT-124M', - 'MBZUAI/LaMini-GPT-774M', - # 'MBZUAI/LaMini-GPT-1.5B', # TODO add - 'aisquared/dlite-v2-774m', - 'Locutusque/gpt2-large-conversational', - ], - 'gpt_bigcode': [ + 'text-generation': [ + 'gpt2', + 'distilgpt2', + 'MBZUAI/LaMini-Cerebras-111M', + 'MBZUAI/LaMini-Cerebras-256M', + 'MBZUAI/LaMini-Cerebras-590M', + # 'MBZUAI/LaMini-Cerebras-1.3B', # TODO add + 'MBZUAI/LaMini-GPT-124M', + 'MBZUAI/LaMini-GPT-774M', + # 'MBZUAI/LaMini-GPT-1.5B', # TODO add + 'aisquared/dlite-v2-774m', + 'Locutusque/gpt2-large-conversational', + ], + }, + 'gpt_bigcode': { # Text generation - 'bigcode/tiny_starcoder_py', - 'abacaj/starcoderbase-1b-sft', - # 'bigcode/starcoderbase-1b', # NOTE: This model is gated, so we ignore it when testing - ], - 'gptj': [ + 'text-generation': [ + 'bigcode/tiny_starcoder_py', + 'abacaj/starcoderbase-1b-sft', + # 'bigcode/starcoderbase-1b', # NOTE: This model is gated, so we ignore it when testing + ], + }, + 'gptj': { # Text generation - 'TabbyML/J-350M', - 'Milos/slovak-gpt-j-405M', - 'heegyu/kogpt-j-350m', - ], - 'herbert': [ + 'text-generation': [ + 'TabbyML/J-350M', + 'Milos/slovak-gpt-j-405M', + 'heegyu/kogpt-j-350m', + ], + }, + 'herbert': { # Feature extraction - 'allegro/herbert-base-cased', - 'allegro/herbert-large-cased', - ], - 'llama': [ + 'feature-extraction': [ + 'allegro/herbert-base-cased', + 'allegro/herbert-large-cased', + ], + }, + 'llama': { # Text generation - 'Xenova/llama2.c-stories15M', - 'Xenova/llama2.c-stories42M', - 'Xenova/llama2.c-stories110M', - 'RajuKandasamy/tamillama_tiny_30m', - 'JackFram/llama-68m', - 'JackFram/llama-160m', - ], - 'm2m_100': [ + 'text-generation': [ + # Text generation + 'Xenova/llama2.c-stories15M', + 'Xenova/llama2.c-stories42M', + 'Xenova/llama2.c-stories110M', + 'RajuKandasamy/tamillama_tiny_30m', + 'JackFram/llama-68m', + 'JackFram/llama-160m', + ] + }, + 'm2m_100': { # Translation - 'facebook/nllb-200-distilled-600M', - 'facebook/m2m100_418M', - ], - 'marian': [ + 'translation': [ + 'facebook/nllb-200-distilled-600M', + 'facebook/m2m100_418M', + ], + }, + 'marian': { # Translation - f'Helsinki-NLP/opus-mt-{x}' - for x in SUPPORTED_HELSINKI_NLP_MODELS - ], - 'mbart': [ - # Translation - 'facebook/mbart-large-50-many-to-many-mmt', - 'facebook/mbart-large-50-many-to-one-mmt', - 'facebook/mbart-large-50', - ], - 'mobilebert': [ + 'translation': [ + f'Helsinki-NLP/opus-mt-{x}' + for x in SUPPORTED_HELSINKI_NLP_MODELS + ], + }, + 'mbart': { + 'translation': [ + 'facebook/mbart-large-50-many-to-many-mmt', + 'facebook/mbart-large-50-many-to-one-mmt', + 'facebook/mbart-large-50', + ], + }, + 'mobilebert': { # Zero-shot classification - 'typeform/mobilebert-uncased-mnli', + 'zero-shot-classification': [ + 'typeform/mobilebert-uncased-mnli', - # TODO: - # https://github.com/huggingface/optimum/issues/1027 - # 'google/mobilebert-uncased', - ], - 'mobilevit': [ + # TODO: + # https://github.com/huggingface/optimum/issues/1027 + # 'google/mobilebert-uncased', + ], + }, + 'mobilevit': { # Image classification - 'apple/mobilevit-small', - 'apple/mobilevit-x-small', - 'apple/mobilevit-xx-small', + 'image-classification': [ + 'apple/mobilevit-small', + 'apple/mobilevit-x-small', + 'apple/mobilevit-xx-small', + ], # TODO: Image segmentation - # 'apple/deeplabv3-mobilevit-small', - # 'apple/deeplabv3-mobilevit-x-small', - # 'apple/deeplabv3-mobilevit-xx-small', - ], - 'mpt': [ + # 'image-segmentation': [ + # 'apple/deeplabv3-mobilevit-small', + # 'apple/deeplabv3-mobilevit-x-small', + # 'apple/deeplabv3-mobilevit-xx-small', + # ], + }, + 'mpt': { # Text generation - 'efederici/ipt-350m', - ], - 'mpnet': [ + 'text-generation': [ + 'efederici/ipt-350m', + ] + }, + 'mpnet': { # Feature extraction - 'sentence-transformers/all-mpnet-base-v2', - 'sentence-transformers/nli-mpnet-base-v2', - 'sentence-transformers/paraphrase-mpnet-base-v2', - 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2', - 'sentence-transformers/multi-qa-mpnet-base-cos-v1', - 'sentence-transformers/multi-qa-mpnet-base-dot-v1', - ], - 'mt5': [ - 'google/mt5-small', - 'google/mt5-base', - ], - 'opt': [ + 'feature-extraction': [ + 'sentence-transformers/all-mpnet-base-v2', + 'sentence-transformers/nli-mpnet-base-v2', + 'sentence-transformers/paraphrase-mpnet-base-v2', + 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2', + 'sentence-transformers/multi-qa-mpnet-base-cos-v1', + 'sentence-transformers/multi-qa-mpnet-base-dot-v1', + ], + }, + 'mt5': { + # Text-to-text + 'text2text-generation': [ + 'google/mt5-small', + 'google/mt5-base', + ], + }, + 'opt': { # Text generation - 'facebook/opt-125m', - 'facebook/opt-350m', - # (TODO conversational) - 'PygmalionAI/pygmalion-350m', - ], - 'resnet': [ + 'text-generation': [ + # Text generation + 'facebook/opt-125m', + 'facebook/opt-350m', + # (TODO conversational) + 'PygmalionAI/pygmalion-350m', + ] + }, + 'resnet': { # Image classification - 'microsoft/resnet-18', - 'microsoft/resnet-26', - 'microsoft/resnet-34', - 'microsoft/resnet-50', - 'microsoft/resnet-101', - 'microsoft/resnet-152', - ], - 'roberta': [ - # Masked language modelling - 'roberta-base', - 'distilroberta-base', - + 'image-classification': [ + 'microsoft/resnet-18', + 'microsoft/resnet-26', + 'microsoft/resnet-34', + 'microsoft/resnet-50', + 'microsoft/resnet-101', + 'microsoft/resnet-152', + ], + }, + 'roberta': { # Feature extraction - 'sentence-transformers/all-distilroberta-v1', - 'sentence-transformers/all-roberta-large-v1', + 'feature-extraction': [ + 'sentence-transformers/all-distilroberta-v1', + 'sentence-transformers/all-roberta-large-v1', + ], # Text classification - 'roberta-large-mnli', + 'text-classification': [ + 'roberta-large-mnli', + ], # Token classification - 'julien-c/EsperBERTo-small-pos', - ], + 'token-classification': [ + 'julien-c/EsperBERTo-small-pos', + ], + + # Masked language modelling + 'fill-mask': [ + 'roberta-base', + 'distilroberta-base', + ], + }, # 'sam': [ # 'facebook/sam-vit-base', # 'facebook/sam-vit-large', # 'facebook/sam-vit-huge', # ], - 'squeezebert': [ + 'squeezebert': { # Feature extraction - 'squeezebert/squeezebert-uncased', - 'squeezebert/squeezebert-mnli', - ], - 'swin': [ + 'feature-extraction': [ + 'squeezebert/squeezebert-uncased', + 'squeezebert/squeezebert-mnli', + ], + }, + 'swin': { # Image classification - 'microsoft/swin-tiny-patch4-window7-224', - 'microsoft/swin-base-patch4-window7-224', - 'microsoft/swin-large-patch4-window12-384-in22k', - 'microsoft/swin-base-patch4-window7-224-in22k', - 'microsoft/swin-base-patch4-window12-384-in22k', - 'microsoft/swin-base-patch4-window12-384', - 'microsoft/swin-large-patch4-window7-224', - 'microsoft/swin-small-patch4-window7-224', - 'microsoft/swin-large-patch4-window7-224-in22k', - 'microsoft/swin-large-patch4-window12-384', - ], - 't5': [ - # Text-to-text (Translation/Summarization) - 't5-small', - 't5-base', - 'google/t5-v1_1-small', - 'google/t5-v1_1-base', - 'google/flan-t5-small', - 'google/flan-t5-base', - 'MBZUAI/LaMini-Flan-T5-77M', - 'MBZUAI/LaMini-Flan-T5-248M', - 'MBZUAI/LaMini-Flan-T5-783M', - 'MBZUAI/LaMini-T5-61M', - 'MBZUAI/LaMini-T5-223M', - 'MBZUAI/LaMini-T5-738M', + 'image-classification': [ + 'microsoft/swin-tiny-patch4-window7-224', + 'microsoft/swin-base-patch4-window7-224', + 'microsoft/swin-large-patch4-window12-384-in22k', + 'microsoft/swin-base-patch4-window7-224-in22k', + 'microsoft/swin-base-patch4-window12-384-in22k', + 'microsoft/swin-base-patch4-window12-384', + 'microsoft/swin-large-patch4-window7-224', + 'microsoft/swin-small-patch4-window7-224', + 'microsoft/swin-large-patch4-window7-224-in22k', + 'microsoft/swin-large-patch4-window12-384', + ], + }, + 't5': { + # Translation/Summarization + ('translation', 'summarization'): [ + 't5-small', + 't5-base', + 'google/t5-v1_1-small', + 'google/t5-v1_1-base', + 'google/flan-t5-small', + 'google/flan-t5-base', + ], + + # Text-to-text + 'text2text-generation': [ + 'MBZUAI/LaMini-Flan-T5-77M', + 'MBZUAI/LaMini-Flan-T5-248M', + 'MBZUAI/LaMini-Flan-T5-783M', + 'MBZUAI/LaMini-T5-61M', + 'MBZUAI/LaMini-T5-223M', + 'MBZUAI/LaMini-T5-738M', + ], # Feature extraction - 'sentence-transformers/sentence-t5-large', - 'hkunlp/instructor-base', - 'hkunlp/instructor-large', - ], - 'vision-encoder-decoder': [ - # Text-to-image - 'nlpconnect/vit-gpt2-image-captioning', - ], - 'vit': [ + 'feature-extraction': [ + 'sentence-transformers/sentence-t5-large', + 'hkunlp/instructor-base', + 'hkunlp/instructor-large', + ], + }, + 'vision-encoder-decoder': { + # Image-to-text + 'image-to-text': [ + 'nlpconnect/vit-gpt2-image-captioning', + ], + }, + 'vit': { # Feature extraction - 'google/vit-base-patch16-224-in21k', - 'facebook/dino-vitb16', - 'facebook/dino-vits8', - 'facebook/dino-vitb8', - 'facebook/dino-vits16', - + 'feature-extraction': [ + 'google/vit-base-patch16-224-in21k', + 'facebook/dino-vitb16', + 'facebook/dino-vits8', + 'facebook/dino-vitb8', + 'facebook/dino-vits16', + ], # Image classification - 'google/vit-base-patch16-224', - ], - 'wav2vec2': [ + 'image-classification': [ + 'google/vit-base-patch16-224', + ], + }, + 'wav2vec2': { # Feature extraction # NOTE: requires --task feature-extraction - 'facebook/mms-300m', - 'facebook/mms-1b', + 'feature-extraction': [ + 'facebook/mms-300m', + 'facebook/mms-1b', + ], # Audio classification - 'alefiury/wav2vec2-large-xlsr-53-gender-recognition-librispeech', - 'superb/wav2vec2-base-superb-ks', - 'facebook/mms-lid-126', - 'facebook/mms-lid-256', - 'facebook/mms-lid-512', - 'facebook/mms-lid-1024', - 'facebook/mms-lid-2048', - 'facebook/mms-lid-4017', + 'audio-classification': [ + 'alefiury/wav2vec2-large-xlsr-53-gender-recognition-librispeech', + 'superb/wav2vec2-base-superb-ks', + 'facebook/mms-lid-126', + 'facebook/mms-lid-256', + 'facebook/mms-lid-512', + 'facebook/mms-lid-1024', + 'facebook/mms-lid-2048', + 'facebook/mms-lid-4017', + ], # Automatic speech recognition - 'jonatasgrosman/wav2vec2-large-xlsr-53-english', - 'facebook/wav2vec2-base-960h', - 'facebook/mms-1b-l1107', - 'facebook/mms-1b-all', - 'facebook/mms-1b-fl102', - ], - 'wavlm': [ + 'automatic-speech-recognition': [ + 'jonatasgrosman/wav2vec2-large-xlsr-53-english', + 'facebook/wav2vec2-base-960h', + 'facebook/mms-1b-l1107', + 'facebook/mms-1b-all', + 'facebook/mms-1b-fl102', + ], + }, + 'wavlm': { # Feature extraction - 'microsoft/wavlm-base', - 'microsoft/wavlm-base-plus', - 'microsoft/wavlm-large', - ], - 'whisper': [ + 'feature-extraction': [ + 'microsoft/wavlm-base', + 'microsoft/wavlm-base-plus', + 'microsoft/wavlm-large', + ], + }, + 'whisper': { # Automatic speech recognition - 'openai/whisper-tiny', - 'openai/whisper-tiny.en', - 'openai/whisper-base', - 'openai/whisper-base.en', - 'openai/whisper-small', - 'openai/whisper-small.en', - 'openai/whisper-medium', - 'openai/whisper-medium.en', - 'openai/whisper-large', - 'openai/whisper-large-v2', - 'NbAiLab/nb-whisper-tiny-beta', - 'NbAiLab/nb-whisper-base-beta', - 'NbAiLab/nb-whisper-small-beta', - 'NbAiLab/nb-whisper-medium-beta', - 'NbAiLab/nb-whisper-large-beta', - ], - 'xlm': [ - 'xlm-clm-ende-1024', - 'xlm-mlm-ende-1024', - 'xlm-clm-enfr-1024', - 'xlm-mlm-enfr-1024', - 'xlm-mlm-17-1280', - 'xlm-mlm-100-1280', - 'xlm-mlm-en-2048', - 'xlm-mlm-enro-1024', - 'xlm-mlm-tlm-xnli15-1024', - 'xlm-mlm-xnli15-1024', - ], - 'xlm-roberta': [ + 'automatic-speech-recognition': [ + 'openai/whisper-tiny', + 'openai/whisper-tiny.en', + 'openai/whisper-base', + 'openai/whisper-base.en', + 'openai/whisper-small', + 'openai/whisper-small.en', + 'openai/whisper-medium', + 'openai/whisper-medium.en', + 'openai/whisper-large', + 'openai/whisper-large-v2', + 'NbAiLab/nb-whisper-tiny-beta', + 'NbAiLab/nb-whisper-base-beta', + 'NbAiLab/nb-whisper-small-beta', + 'NbAiLab/nb-whisper-medium-beta', + 'NbAiLab/nb-whisper-large-beta', + ] + }, + 'xlm': { + # Masked language modelling + 'fill-mask': [ + 'xlm-clm-ende-1024', + 'xlm-mlm-ende-1024', + 'xlm-clm-enfr-1024', + 'xlm-mlm-enfr-1024', + 'xlm-mlm-17-1280', + 'xlm-mlm-100-1280', + 'xlm-mlm-en-2048', + 'xlm-mlm-enro-1024', + 'xlm-mlm-tlm-xnli15-1024', + 'xlm-mlm-xnli15-1024', + ], + }, + 'xlm-roberta': { # Masked language modelling - 'xlm-roberta-base' - ], - 'yolos': [ + 'fill-mask': [ + 'xlm-roberta-base' + ], + }, + 'yolos': { # Object detection - 'hustvl/yolos-tiny', - 'hustvl/yolos-small', - 'hustvl/yolos-base', - 'hustvl/yolos-small-dwr', - 'hustvl/yolos-small-300', - ] + 'object-detection': [ + # Object detection + 'hustvl/yolos-tiny', + 'hustvl/yolos-small', + 'hustvl/yolos-base', + 'hustvl/yolos-small-dwr', + 'hustvl/yolos-small-300', + ], + }, } def main(): - for model_type, model_ids in SUPPORTED_MODELS.items(): - print(f'# {model_type:=^80}') - for model_id in model_ids: - print( - f'python -m scripts.convert --quantize --model_id {model_id}') - print() + for model_type, tasks in SUPPORTED_MODELS.items(): + for task, model_ids in tasks.items(): + print(f'# {model_type:=^80}') + for model_id in model_ids: + print( + f'python -m scripts.convert --quantize --model_id {model_id}') + print() if __name__ == '__main__': diff --git a/tests/generate_tests.py b/tests/generate_tests.py index 37fd5e43b..fc010a0d6 100644 --- a/tests/generate_tests.py +++ b/tests/generate_tests.py @@ -100,12 +100,19 @@ } +FLATTENED_SUPPORTED_MODELS = [ + (model_type, [ + model for task_models in tasks.values() for model in task_models + ]) for model_type, tasks in SUPPORTED_MODELS.items() +] + + def generate_tokenizer_tests(): results = {} - tokenizers_to_test = list(SUPPORTED_MODELS.items()) + \ - list(ADDITIONAL_TOKENIZERS_TO_TEST.items()) + tokenizers_to_test = FLATTENED_SUPPORTED_MODELS + tokenizers_to_test += list(ADDITIONAL_TOKENIZERS_TO_TEST.items()) for model_type, tokenizer_names in tokenizers_to_test: if model_type in MODELS_TO_IGNORE: @@ -169,7 +176,7 @@ def generate_tokenizer_tests(): def generate_config_tests(): results = {} - for model_type, config_names in SUPPORTED_MODELS.items(): + for model_type, config_names in FLATTENED_SUPPORTED_MODELS: print(f'Generating tests for {model_type}') for config_name in config_names: From 4245e26fd5d88461ec4d36b0ec4e830394550098 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 18 Sep 2023 15:53:41 +0200 Subject: [PATCH 03/14] Update example model id --- README.md | 2 +- docs/snippets/1_quick-tour.snippet | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6e34328ef..ada64d883 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ let out = await pipe('I love transformers!'); You can also use a different model by specifying the model id or path as the second argument to the `pipeline` function. For example: ```javascript // Use a different model for sentiment-analysis -let pipe = await pipeline('sentiment-analysis', 'nlptown/bert-base-multilingual-uncased-sentiment'); +let pipe = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment'); ``` diff --git a/docs/snippets/1_quick-tour.snippet b/docs/snippets/1_quick-tour.snippet index 2f2fa58d9..dec6b341f 100644 --- a/docs/snippets/1_quick-tour.snippet +++ b/docs/snippets/1_quick-tour.snippet @@ -40,5 +40,5 @@ let out = await pipe('I love transformers!'); You can also use a different model by specifying the model id or path as the second argument to the `pipeline` function. For example: ```javascript // Use a different model for sentiment-analysis -let pipe = await pipeline('sentiment-analysis', 'nlptown/bert-base-multilingual-uncased-sentiment'); +let pipe = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment'); ``` From 5b79108b5d6df4798b468652fde88083dea79ca8 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Tue, 19 Sep 2023 02:34:55 +0200 Subject: [PATCH 04/14] Update list of supported models --- scripts/supported_models.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/supported_models.py b/scripts/supported_models.py index 928728eaa..c645aee88 100644 --- a/scripts/supported_models.py +++ b/scripts/supported_models.py @@ -77,6 +77,12 @@ 'BAAI/bge-small-en', 'BAAI/bge-base-en', 'BAAI/bge-large-en', + 'BAAI/bge-large-en-v1.5', + 'BAAI/bge-base-en-v1.5', + 'BAAI/bge-small-en-v1.5', + 'BAAI/bge-large-zh-v1.5', + 'BAAI/bge-base-zh-v1.5', + 'BAAI/bge-small-zh-v1.5', 'allenai/scibert_scivocab_uncased', 'SpanBERT/spanbert-large-cased', 'SpanBERT/spanbert-base-cased', @@ -92,9 +98,10 @@ 'nlptown/bert-base-multilingual-uncased-sentiment', 'ProsusAI/finbert', 'unitary/toxic-bert', + 'BAAI/bge-reranker-large', + 'BAAI/bge-reranker-base', ], - # Token classification 'token-classification': [ 'Davlan/bert-base-multilingual-cased-ner-hrl', @@ -133,8 +140,6 @@ ], }, - - 'camembert': { # Feature extraction 'feature-extraction': [ From dc0e881934b8bf748e77e24e893a09f47a6c1c44 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Tue, 19 Sep 2023 03:02:46 +0200 Subject: [PATCH 05/14] Update generate_tests.py --- tests/generate_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/generate_tests.py b/tests/generate_tests.py index fc010a0d6..4614d9c02 100644 --- a/tests/generate_tests.py +++ b/tests/generate_tests.py @@ -111,8 +111,8 @@ def generate_tokenizer_tests(): results = {} - tokenizers_to_test = FLATTENED_SUPPORTED_MODELS - tokenizers_to_test += list(ADDITIONAL_TOKENIZERS_TO_TEST.items()) + tokenizers_to_test = FLATTENED_SUPPORTED_MODELS + \ + list(ADDITIONAL_TOKENIZERS_TO_TEST.items()) for model_type, tokenizer_names in tokenizers_to_test: if model_type in MODELS_TO_IGNORE: From ea13e6d63fc242f2d8b41321c4d8eb4db5860605 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Tue, 19 Sep 2023 03:14:20 +0200 Subject: [PATCH 06/14] Remove requirement of `output_attentions` revision --- src/pipelines.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/pipelines.js b/src/pipelines.js index e2be02fcf..553281bf1 100644 --- a/src/pipelines.js +++ b/src/pipelines.js @@ -1054,9 +1054,7 @@ export class AudioClassificationPipeline extends Pipeline { * **Example:** Transcribe English w/ word-level timestamps. * ```javascript * let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav'; - * let transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { - * revision: 'output_attentions', - * }); + * let transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en'); * let output = await transcriber(url, { return_timestamps: 'word' }); * // { * // "text": " And so my fellow Americans ask not what your country can do for you ask what you can do for your country.", From 288531d5a4836d3556273df36044be96c5905df8 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 4 Oct 2023 18:36:38 +0200 Subject: [PATCH 07/14] Add demo site to examples section (closes #233) --- README.md | 2 ++ docs/snippets/3_examples.snippet | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index 67474c2b9..18d846253 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,8 @@ Want to jump straight in? Get started with one of our sample applications/templa | Next.js (client-side) | Sentiment analysis (in-browser inference) | [code](./examples/next-client/), [demo](https://huggingface.co/spaces/Xenova/next-example-app) | | Next.js (server-side) | Sentiment analysis (Node.js inference) | [code](./examples/next-server/), [demo](https://huggingface.co/spaces/Xenova/next-server-example-app) | | Node.js | Sentiment analysis API | [code](./examples/node/) | +| Demo site | A collection of demos | [code](./examples/demo-site/), [demo](https://xenova.github.io/transformers.js/) | + ## Custom usage diff --git a/docs/snippets/3_examples.snippet b/docs/snippets/3_examples.snippet index 009494342..1f35c83b0 100644 --- a/docs/snippets/3_examples.snippet +++ b/docs/snippets/3_examples.snippet @@ -14,3 +14,5 @@ Want to jump straight in? Get started with one of our sample applications/templa | Next.js (client-side) | Sentiment analysis (in-browser inference) | [code](./examples/next-client/), [demo](https://huggingface.co/spaces/Xenova/next-example-app) | | Next.js (server-side) | Sentiment analysis (Node.js inference) | [code](./examples/next-server/), [demo](https://huggingface.co/spaces/Xenova/next-server-example-app) | | Node.js | Sentiment analysis API | [code](./examples/node/) | +| Demo site | A collection of demos | [code](./examples/demo-site/), [demo](https://xenova.github.io/transformers.js/) | + From cddd37c2979e34f695e6a055768cc0e3e2af3c3f Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 4 Oct 2023 18:37:07 +0200 Subject: [PATCH 08/14] Fix typo --- scripts/supported_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/supported_models.py b/scripts/supported_models.py index 1adb2db33..e36a6792a 100644 --- a/scripts/supported_models.py +++ b/scripts/supported_models.py @@ -370,7 +370,7 @@ 'voidful/long-t5-encodec-tglobal-base', ], }, - 'm2m_100': [ + 'm2m_100': { # Translation 'translation': [ 'facebook/nllb-200-distilled-600M', From 113485233e001c0d7cd37e9a9b4ff7c6fd50d318 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 4 Oct 2023 18:41:33 +0200 Subject: [PATCH 09/14] Include examples in docs index --- docs/source/index.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/source/index.md b/docs/source/index.md index 03e496e60..1b94c115f 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -23,6 +23,14 @@ The documentation is organized into 4 sections: 3. **DEVELOPER GUIDES** show you how to use the library to achieve a specific goal. 4. **API REFERENCE** describes all classes and functions, as well as their available parameters and types. +## Examples + + +{ + "path": "../snippets/3_examples.snippet" +} + + ## Supported tasks/models Here is the list of all tasks and architectures currently supported by Transformers.js. From 09032bc18a6c58b5bfa0d16b8f48a32bea941525 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 6 Dec 2023 16:04:05 +0200 Subject: [PATCH 10/14] Update github issue templates --- .github/ISSUE_TEMPLATE/1_bug-report.md | 40 --------------- .github/ISSUE_TEMPLATE/1_bug-report.yml | 51 ++++++++++++++++++++ .github/ISSUE_TEMPLATE/2_feature-request.md | 26 ---------- .github/ISSUE_TEMPLATE/2_new_model.yml | 40 +++++++++++++++ .github/ISSUE_TEMPLATE/3_new_pipeline.yml | 40 +++++++++++++++ .github/ISSUE_TEMPLATE/3_question.md | 10 ---- .github/ISSUE_TEMPLATE/4_feature-request.yml | 31 ++++++++++++ .github/ISSUE_TEMPLATE/5_question.yml | 13 +++++ 8 files changed, 175 insertions(+), 76 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/1_bug-report.md create mode 100644 .github/ISSUE_TEMPLATE/1_bug-report.yml delete mode 100644 .github/ISSUE_TEMPLATE/2_feature-request.md create mode 100644 .github/ISSUE_TEMPLATE/2_new_model.yml create mode 100644 .github/ISSUE_TEMPLATE/3_new_pipeline.yml delete mode 100644 .github/ISSUE_TEMPLATE/3_question.md create mode 100644 .github/ISSUE_TEMPLATE/4_feature-request.yml create mode 100644 .github/ISSUE_TEMPLATE/5_question.yml diff --git a/.github/ISSUE_TEMPLATE/1_bug-report.md b/.github/ISSUE_TEMPLATE/1_bug-report.md deleted file mode 100644 index 62b3d08a2..000000000 --- a/.github/ISSUE_TEMPLATE/1_bug-report.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: "[Bug] Title goes here." -labels: bug -assignees: '' - ---- - -**Describe the bug** -*A clear and concise description of what the bug is.* - - - - -**How to reproduce** -*Steps or a minimal working example to reproduce the behavior* - - - - -**Expected behavior** -*A clear and concise description of what you expected to happen.* - - - -**Logs/screenshots** -*If applicable, add logs/screenshots to help explain your problem.* - -**Environment** -- Transformers.js version: -- Browser (if applicable): -- Operating system (if applicable): -- Other: - - -**Additional context** -*Add any other context about the problem here.* - - diff --git a/.github/ISSUE_TEMPLATE/1_bug-report.yml b/.github/ISSUE_TEMPLATE/1_bug-report.yml new file mode 100644 index 000000000..70ceedd96 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1_bug-report.yml @@ -0,0 +1,51 @@ +name: "🐛 Bug Report" +description: Submit a bug report to help us improve transformers.js +labels: [ "bug" ] +body: + - type: textarea + id: system-info + attributes: + label: System Info + description: Please share your system info with us. If you are using other JS libraries/frameworks (e.g., React or Next.js), please include their versions too. + placeholder: transformers.js version, browser (if applicable), operating system, Node.js version, bundlers, ... + validations: + required: true + + - type: checkboxes + id: environment + attributes: + label: Environment + description: "The environment I am running in:" + options: + - label: "Website/web-app" + - label: "Browser extension" + - label: "Server-side (e.g., Node.js, Deno, Bun)" + - label: "Desktop app (e.g., Electron)" + - label: "Other (e.g., VSCode extension)" + + - type: textarea + id: description + validations: + required: true + attributes: + label: Description + description: A clear and concise description of the bug, as well as what you expected to happen. + + - type: textarea + id: reproduction + validations: + required: true + attributes: + label: Reproduction + description: | + Please provide a code sample that reproduces the problem you ran into. + If you have code snippets, error messages, stack traces please provide them here as well. + Important! Use [code tags](https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting) to correctly format your code. + + placeholder: | + Steps to reproduce the behavior: + + 1. + 2. + 3. + diff --git a/.github/ISSUE_TEMPLATE/2_feature-request.md b/.github/ISSUE_TEMPLATE/2_feature-request.md deleted file mode 100644 index 235519115..000000000 --- a/.github/ISSUE_TEMPLATE/2_feature-request.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -name: Feature request -about: Suggest a new feature (e.g., model, pipeline, task) for this project -title: "[Feature request] Title goes here." -labels: enhancement -assignees: '' - ---- - -**Name of the feature** -*In general, the feature you want added should be supported by HuggingFace's [transformers](https://github.com/huggingface/transformers) library:* - - *If requesting a **model**, it must be listed [here](https://huggingface.co/docs/transformers/index#supported-models).* - - *If requesting a **pipeline**, it must be listed [here](https://huggingface.co/docs/transformers/main_classes/pipelines).* -- *If requesting a **task**, it must be listed [here](https://huggingface.co/tasks).* - - - - -**Reason for request** -*Why is it important that we add this feature? What is your intended use case? Remember, we are more likely to add support for models/pipelines/tasks that are popular (e.g., many downloads), or contain functionality that does not exist (e.g., new input type).* - - - - -**Additional context** -*Add any other context or screenshots about the feature request here.* diff --git a/.github/ISSUE_TEMPLATE/2_new_model.yml b/.github/ISSUE_TEMPLATE/2_new_model.yml new file mode 100644 index 000000000..5b7fe3014 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2_new_model.yml @@ -0,0 +1,40 @@ +name: "🌟 New model addition" +description: Submit a proposal/request to implement a new model +labels: [ "New model" ] + +body: + - type: textarea + id: description-request + validations: + required: true + attributes: + label: Model description + description: | + Include important information about the model. + + - type: checkboxes + id: information-tasks + attributes: + label: Prerequisites + description: | + Please note that Transformers.js relies on the model first being supported in [🤗 Transformers](https://github.com/huggingface/transformers) and [🤗 Optimum](https://github.com/huggingface/optimum). If the model you are requesting is not yet supported by either of them, feel free to open up a model request there too. + options: + - label: "The model is supported in Transformers (i.e., listed [here](https://huggingface.co/docs/transformers/index#supported-models-and-frameworks))" + - label: "The model can be exported to ONNX with Optimum (i.e., listed [here](https://huggingface.co/docs/optimum/main/en/exporters/onnx/overview))" + + - type: textarea + id: additional-info + attributes: + label: Additional information + description: | + Please provide additional information about the model here. + If the model is already supported in Transformers, you can provide example Python code to help ensure the JavaScript implementation (and output) matches the original version. + + - type: textarea + id: contribution + validations: + required: true + attributes: + label: Your contribution + description: | + Is there any way that you could help, e.g. by submitting a PR? diff --git a/.github/ISSUE_TEMPLATE/3_new_pipeline.yml b/.github/ISSUE_TEMPLATE/3_new_pipeline.yml new file mode 100644 index 000000000..ff7b8bff7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3_new_pipeline.yml @@ -0,0 +1,40 @@ +name: "🔧 New pipeline addition" +description: Submit a proposal/request to implement a new pipeline +labels: [ "New pipeline" ] + +body: + - type: textarea + id: description-request + validations: + required: true + attributes: + label: Pipeline description + description: | + Put any and all important information related to the pipeline. + + - type: checkboxes + id: information-tasks + attributes: + label: Prerequisites + description: | + Please note that Transformers.js relies on the pipeline first being supported in [🤗 Transformers](https://github.com/huggingface/transformers). If the pipeline you are requesting is not yet supported by Transformers, feel free to open up a feature request for it there too. + options: + - label: "The pipeline is supported in Transformers (i.e., listed [here](https://huggingface.co/docs/transformers/main_classes/pipelines))" + - label: "The task is listed [here](https://huggingface.co/tasks)" + + - type: textarea + id: additional-info + attributes: + label: Additional information + description: | + Please provide additional information about the pipeline here. + If the pipeline is already supported in Transformers, you can provide example Python code to help ensure the JavaScript implementation (and output) matches the original version. + + - type: textarea + id: contribution + validations: + required: true + attributes: + label: Your contribution + description: | + Is there any way that you could help, e.g. by submitting a PR? diff --git a/.github/ISSUE_TEMPLATE/3_question.md b/.github/ISSUE_TEMPLATE/3_question.md deleted file mode 100644 index d8beec23e..000000000 --- a/.github/ISSUE_TEMPLATE/3_question.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -name: Ask a question -about: Ask a question about the library -title: "[Question] Title goes here." -labels: question -assignees: '' - ---- - - diff --git a/.github/ISSUE_TEMPLATE/4_feature-request.yml b/.github/ISSUE_TEMPLATE/4_feature-request.yml new file mode 100644 index 000000000..cda6f9f2a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/4_feature-request.yml @@ -0,0 +1,31 @@ +name: "\U0001F680 Feature request" +description: Submit a proposal/request for a new transformers.js feature +labels: [ "feature" ] +body: + - type: textarea + id: feature-request + validations: + required: true + attributes: + label: Feature request + description: | + A clear and concise description of the feature proposal. + If the feature is already part of the python [Transformers](https://github.com/huggingface/transformers) library, please provide relevant links or example usage. + + - type: textarea + id: motivation + validations: + required: true + attributes: + label: Motivation + description: | + Please outline the motivation for the proposal. Why is it important that we add this feature? What is your intended use case? + + - type: textarea + id: contribution + validations: + required: true + attributes: + label: Your contribution + description: | + Is there any way that you could help, e.g. by submitting a PR? diff --git a/.github/ISSUE_TEMPLATE/5_question.yml b/.github/ISSUE_TEMPLATE/5_question.yml new file mode 100644 index 000000000..2af3acbcf --- /dev/null +++ b/.github/ISSUE_TEMPLATE/5_question.yml @@ -0,0 +1,13 @@ +name: "🙋 Question" +description: Ask a question about the library +labels: [ "question" ] + +body: + - type: textarea + id: question + validations: + required: true + attributes: + label: Question + description: | + Please enter your question here... From b484d930ad192fff0a92541795893c9999dd15c3 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 6 Dec 2023 16:22:47 +0200 Subject: [PATCH 11/14] Create config.yml --- .github/ISSUE_TEMPLATE/config.yml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/config.yml diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..d071e5961 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,9 @@ +blank_issues_enabled: true +version: 2.1 +contact_links: + - name: Models on the Hugging Face Hub + url: https://huggingface.co/models?library=transformers.js + about: Open a Pull request / Discussion related to a specific model checkpoint directly on the Hugging Face Hub + - name: Documentation + url: https://huggingface.co/docs/transformers.js + about: View the Transformers.js documentation From d9aab4ce2186388b71e3bf8f92ad21ddbb31510f Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 6 Dec 2023 16:31:05 +0200 Subject: [PATCH 12/14] Order supported models --- scripts/supported_models.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/scripts/supported_models.py b/scripts/supported_models.py index 26a46cbbd..0a7eafa3c 100644 --- a/scripts/supported_models.py +++ b/scripts/supported_models.py @@ -173,17 +173,6 @@ 'airesearch/wangchanberta-base-att-spm-uncased', ], }, - - 'clip': { - # Zero-shot image classification (and feature extraction) - # (with and without `--split_modalities`) - 'zero-shot-image-classification': [ - 'openai/clip-vit-base-patch16', - 'openai/clip-vit-base-patch32', - 'openai/clip-vit-large-patch14', - 'openai/clip-vit-large-patch14-336', - ] - }, 'clap': { # Zero-shot audio classification and feature extraction # (with and without `--split_modalities`) @@ -195,6 +184,16 @@ # 'Xenova/tiny-random-ClapModel', } }, + 'clip': { + # Zero-shot image classification (and feature extraction) + # (with and without `--split_modalities`) + 'zero-shot-image-classification': [ + 'openai/clip-vit-base-patch16', + 'openai/clip-vit-base-patch32', + 'openai/clip-vit-large-patch14', + 'openai/clip-vit-large-patch14-336', + ] + }, 'codegen': { # Text generation 'text-generation': [ From cba5b8d9a47b9fa3b494a9ac146b4cb2de0dfae4 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 6 Dec 2023 16:32:44 +0200 Subject: [PATCH 13/14] Cleanup --- scripts/supported_models.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/scripts/supported_models.py b/scripts/supported_models.py index 0a7eafa3c..fca98bc9e 100644 --- a/scripts/supported_models.py +++ b/scripts/supported_models.py @@ -129,21 +129,21 @@ 'bert-base-multilingual-cased', 'bert-base-chinese', 'emilyalsentzer/Bio_ClinicalBERT', - ] + ], }, 'blenderbot': { # Text-to-text (TODO add conversational) 'text2text-generation': [ 'facebook/blenderbot-400M-distill', # 'facebook/blenderbot-1B-distill', - ] + ], }, 'blenderbot-small': { # Text-to-text (TODO add conversational) 'text2text-generation': [ # 'facebook/blenderbot-90M', # DEPRECATED 'facebook/blenderbot_small-90M', - ] + ], }, 'bloom': { # Text generation @@ -192,7 +192,7 @@ 'openai/clip-vit-base-patch32', 'openai/clip-vit-large-patch14', 'openai/clip-vit-large-patch14-336', - ] + ], }, 'codegen': { # Text generation @@ -351,7 +351,7 @@ 'text-generation': [ 'Rocketknight1/tiny-random-falcon-7b', 'fxmarty/really-tiny-falcon-testing', - ] + ], }, 'glpn': { # Depth estimation @@ -430,7 +430,7 @@ 'RajuKandasamy/tamillama_tiny_30m', 'JackFram/llama-68m', 'JackFram/llama-160m', - ] + ], }, 'longt5': { # Text-to-text @@ -510,7 +510,7 @@ # Text generation 'text-generation': [ 'efederici/ipt-350m', - ] + ], }, 'mpnet': { # Feature extraction @@ -545,7 +545,7 @@ 'facebook/opt-350m', # (TODO conversational) 'PygmalionAI/pygmalion-350m', - ] + ], }, 'owlvit': { # Object detection (Zero-shot object detection) @@ -674,7 +674,7 @@ 'microsoft/trocr-base-printed', 'microsoft/trocr-small-handwritten', 'microsoft/trocr-base-handwritten', - ] + ], }, 'vision-encoder-decoder': { # Image-to-text @@ -750,7 +750,7 @@ 'NbAiLab/nb-whisper-small-beta', 'NbAiLab/nb-whisper-medium-beta', 'NbAiLab/nb-whisper-large-beta', - ] + ], }, 'xlm': { # Masked language modelling From d7ecd97ef7012e5b1bb0a01c1ab7066375d11c42 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 6 Dec 2023 16:38:33 +0200 Subject: [PATCH 14/14] Update 4_feature-request.yml --- .github/ISSUE_TEMPLATE/4_feature-request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/4_feature-request.yml b/.github/ISSUE_TEMPLATE/4_feature-request.yml index cda6f9f2a..0ef12f408 100644 --- a/.github/ISSUE_TEMPLATE/4_feature-request.yml +++ b/.github/ISSUE_TEMPLATE/4_feature-request.yml @@ -1,4 +1,4 @@ -name: "\U0001F680 Feature request" +name: "🚀 Feature request" description: Submit a proposal/request for a new transformers.js feature labels: [ "feature" ] body: