diff --git a/requirements.txt b/requirements.txt index efd3a19b..f4eef972 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ spacy>=3.5.0,<4.0.0 numpy>=1.15.0 -transformers>=3.4.0,<4.35.0 +transformers[sentencepiece]>=3.4.0,<4.35.0 torch>=1.8.0 srsly>=2.4.0,<3.0.0 dataclasses>=0.6,<1.0; python_version < "3.7" diff --git a/spacy_transformers/tests/test_pipeline_component.py b/spacy_transformers/tests/test_pipeline_component.py index 5b0598f0..984c1f39 100644 --- a/spacy_transformers/tests/test_pipeline_component.py +++ b/spacy_transformers/tests/test_pipeline_component.py @@ -238,7 +238,8 @@ def test_transformer_pipeline_tagger_senter_listener(): def test_transformer_sentencepiece_IO(): """Test that a transformer using sentencepiece trains + IO goes OK""" orig_config = Config().from_str(cfg_string) - orig_config["components"]["transformer"]["model"]["name"] = "camembert-base" + orig_config["components"]["transformer"]["model"]["name"] = "hf-internal-testing/tiny-xlm-roberta" + orig_config["components"]["transformer"]["model"]["tokenizer_config"] = {"use_fast": False} nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True) tagger = nlp.get_pipe("tagger") tagger_trf = tagger.model.get_ref("tok2vec").layers[0]