Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue in Inference #49

Open
sushgandhi opened this issue Feb 8, 2023 · 0 comments
Open

Issue in Inference #49

sushgandhi opened this issue Feb 8, 2023 · 0 comments

Comments

@sushgandhi
Copy link

Just experimenting with this.
Tried to install tner and load model.

getting error for tokenizer file not found. same error when trying to use transformers lib here https://huggingface.co/tner/deberta-v3-large-fin

`---------------------------------------------------------------------------
Exception Traceback (most recent call last)
Cell In[16], line 1
----> 1 model = TransformersNER("tner/deberta-v3-large-fin")

File ~file_path/lib/python3.8/site-packages/tner/ner_model.py:103, in TransformersNER.init(self, model, max_length, crf, use_auth_token, label2id, non_entity_symbol)
101 # load pre processor
102 if self.crf_layer is not None:
--> 103 self.tokenizer = NERTokenizer(
104 self.model_name,
105 id2label=self.id2label,
106 padding_id=self.label2id[self.non_entity_symbol],
107 use_auth_token=use_auth_token)
108 else:
109 self.tokenizer = NERTokenizer(self.model_name, id2label=self.id2label, use_auth_token=use_auth_token)

File ~file_path/lib/python3.8/site-packages/tner/ner_tokenizer.py:40, in NERTokenizer.init(self, tokenizer_name, id2label, padding_id, use_auth_token, is_xlnet)
37 self.tokenizer = AutoTokenizer.from_pretrained(
38 tokenizer_name, use_auth_token=use_auth_token)
39 except Exception:
---> 40 self.tokenizer = AutoTokenizer.from_pretrained(
41 tokenizer_name, use_auth_token=use_auth_token, local_files_only=True)
42 if self.tokenizer.pad_token is None:
43 self.tokenizer.pad_token = PAD_TOKEN_LABEL_ID

File ~file_path/lib/python3.8/site-packages/transformers/models/auto/tokenization_auto.py:658, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
654 if tokenizer_class is None:
655 raise ValueError(
656 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
657 )
--> 658 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
660 # Otherwise we have to be creative.
661 # if model is an encoder decoder, the encoder tokenizer class is used by default
662 if isinstance(config, EncoderDecoderConfig):

File ~file_path/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1804, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
1801 else:
1802 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1804 return cls._from_pretrained(
1805 resolved_vocab_files,
1806 pretrained_model_name_or_path,
1807 init_configuration,
1808 *init_inputs,
1809 use_auth_token=use_auth_token,
1810 cache_dir=cache_dir,
1811 local_files_only=local_files_only,
1812 _commit_hash=commit_hash,
1813 **kwargs,
1814 )

File ~file_path/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1959, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, *init_inputs, **kwargs)
1957 # Instantiate tokenizer.
1958 try:
-> 1959 tokenizer = cls(*init_inputs, **init_kwargs)
1960 except OSError:
1961 raise OSError(
1962 "Unable to load vocabulary from file. "
1963 "Please check that the provided vocabulary is accessible and not corrupted."
1964 )

File ~file_path/lib/python3.8/site-packages/transformers/models/deberta_v2/tokenization_deberta_v2_fast.py:133, in DebertaV2TokenizerFast.init(self, vocab_file, tokenizer_file, do_lower_case, split_by_punct, bos_token, eos_token, unk_token, sep_token, pad_token, cls_token, mask_token, **kwargs)
118 def init(
119 self,
120 vocab_file=None,
(...)
131 **kwargs
132 ) -> None:
--> 133 super().init(
134 vocab_file,
135 tokenizer_file=tokenizer_file,
136 do_lower_case=do_lower_case,
137 bos_token=bos_token,
138 eos_token=eos_token,
139 unk_token=unk_token,
140 sep_token=sep_token,
141 pad_token=pad_token,
142 cls_token=cls_token,
143 mask_token=mask_token,
144 split_by_punct=split_by_punct,
145 **kwargs,
146 )
148 self.do_lower_case = do_lower_case
149 self.split_by_punct = split_by_punct

File ~file_path/lib/python3.8/site-packages/transformers/tokenization_utils_fast.py:111, in PreTrainedTokenizerFast.init(self, *args, **kwargs)
108 fast_tokenizer = copy.deepcopy(tokenizer_object)
109 elif fast_tokenizer_file is not None and not from_slow:
110 # We have a serialization from tokenizers which let us directly build the backend
--> 111 fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
112 elif slow_tokenizer is not None:
113 # We need to convert a slow tokenizer to build the backend
114 fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)

Exception: No such file or directory (os error 2)`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant