diff --git a/src/tokenizers.js b/src/tokenizers.js index 030acfec7..8cee6f2dc 100644 --- a/src/tokenizers.js +++ b/src/tokenizers.js @@ -2744,6 +2744,12 @@ export class PreTrainedTokenizer extends Callable { x = this.normalizer(x); } + // If, after normalization, this section is empty (e.g., trimming whitespace), + // we return an empty array + if (x.length === 0) { + return []; + } + const sectionTokens = (this.pre_tokenizer !== null) ? this.pre_tokenizer(x, { section_index, }) : [x];