Skip to content

Commit

Permalink
Docstring for accents
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipMay committed Aug 1, 2020
1 parent 9dd0028 commit ce4b869
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions model/tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,13 @@ class FullTokenizer(object):
"""Runs end-to-end tokenziation."""

def __init__(self, vocab_file, do_lower_case=True, strip_accents=True):
"""Constructs a FullTokenizer.
Args:
vocab_file: The vocabulary file.
do_lower_case: Whether to lower case the input.
strip_accents: Whether to strip the accents.
"""
self.vocab = load_vocab(vocab_file)
self.inv_vocab = {v: k for k, v in self.vocab.items()}
self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, strip_accents=strip_accents)
Expand Down

0 comments on commit ce4b869

Please sign in to comment.