diff --git a/docs/source/en/model_doc/ijepa.md b/docs/source/en/model_doc/ijepa.md index 9a0cd368a8188f..32944e2617eae1 100644 --- a/docs/source/en/model_doc/ijepa.md +++ b/docs/source/en/model_doc/ijepa.md @@ -45,7 +45,7 @@ url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg" image_1 = Image.open(requests.get(url_1, stream=True).raw) image_2 = Image.open(requests.get(url_2, stream=True).raw) -model_id = "jmtzt/ijepa_vith14_1k" +model_id = "facebook/ijepa_vith14_1k" processor = AutoProcessor.from_pretrained(model_id) model = AutoModel.from_pretrained(model_id) diff --git a/src/transformers/models/ijepa/modular_ijepa.py b/src/transformers/models/ijepa/modular_ijepa.py index efbd71d91342fd..3b3756dd5ce697 100644 --- a/src/transformers/models/ijepa/modular_ijepa.py +++ b/src/transformers/models/ijepa/modular_ijepa.py @@ -155,7 +155,7 @@ def __init__(self, config: IJepaConfig, add_pooling_layer: bool = False, use_mas self.embeddings = IJepaEmbeddings(config, use_mask_token=use_mask_token) -_IMAGE_CLASS_CHECKPOINT = "jmtzt/ijepa_vith14_1k" +_IMAGE_CLASS_CHECKPOINT = "facebook/ijepa_vith14_1k" _IMAGE_CLASS_EXPECTED_OUTPUT = "Egyptian cat" diff --git a/tests/models/ijepa/test_modeling_ijepa.py b/tests/models/ijepa/test_modeling_ijepa.py index 27a79bc6724285..723ddcf7988826 100644 --- a/tests/models/ijepa/test_modeling_ijepa.py +++ b/tests/models/ijepa/test_modeling_ijepa.py @@ -250,7 +250,7 @@ def test_for_image_classification(self): @slow def test_model_from_pretrained(self): - model_name = "jmtzt/ijepa_vith14_1k" + model_name = "facebook/ijepa_vith14_1k" model = IJepaModel.from_pretrained(model_name) self.assertIsNotNone(model) @@ -266,11 +266,11 @@ def prepare_img(): class IJepaModelIntegrationTest(unittest.TestCase): @cached_property def default_image_processor(self): - return ViTImageProcessor.from_pretrained("jmtzt/ijepa_vith14_1k") if is_vision_available() else None + return ViTImageProcessor.from_pretrained("facebook/ijepa_vith14_1k") if is_vision_available() else None @slow def test_inference_no_head(self): - model = IJepaModel.from_pretrained("jmtzt/ijepa_vith14_1k").to(torch_device) + model = IJepaModel.from_pretrained("facebook/ijepa_vith14_1k").to(torch_device) image_processor = self.default_image_processor image = prepare_img() @@ -299,7 +299,7 @@ def test_inference_fp16(self): A small test to make sure that inference work in half precision without any problem. """ model = IJepaModel.from_pretrained( - "jmtzt/ijepa_vith14_1k", + "facebook/ijepa_vith14_1k", torch_dtype=torch.float16, device_map="auto", ) @@ -319,7 +319,7 @@ def test_inference_interpolate_pos_encoding(self): # allowing to interpolate the pre-trained position embeddings in order to use # the model on higher resolutions. The DINO model by Facebook AI leverages this # to visualize self-attention on higher resolution images. - model = IJepaModel.from_pretrained("jmtzt/ijepa_vith14_1k").to(torch_device) + model = IJepaModel.from_pretrained("facebook/ijepa_vith14_1k").to(torch_device) image_processor = self.default_image_processor image = prepare_img()