Skip to content

Commit

Permalink
Update comments for tokenizer changes
Browse files Browse the repository at this point in the history
Signed-off-by: Alex-Brooks <[email protected]>
  • Loading branch information
alex-jw-brooks committed Sep 29, 2023
1 parent ed1f8e6 commit 579db79
Showing 1 changed file with 3 additions and 10 deletions.
13 changes: 3 additions & 10 deletions caikit_nlp/modules/text_generation/peft_prompt_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from tqdm import tqdm
from transformers import (
AutoModelForCausalLM,
DataCollatorForLanguageModeling,
default_data_collator,
)
from transformers.models.auto.tokenization_auto import AutoTokenizer
Expand Down Expand Up @@ -890,13 +889,8 @@ def _get_collate_fn(tokenizer: AutoTokenizer, task_type: str) -> Callable:
Callable
collate_fn to be used for processing batches from our datasets.
"""
# HACK: Do NOT use the causal LM collator (for now) because we want to set the labels ourselves...
# if task_type == "CAUSAL_LM":
# return DataCollatorForLanguageModeling(
# tokenizer=tokenizer,
# return_tensors="pt",
# mlm=False,
# )
# HACK: Do NOT use the causal LM collator (for now) because
# want to set labels ourselves. TODO: centralize collator management.
return default_data_collator

@staticmethod
Expand Down Expand Up @@ -944,8 +938,7 @@ def _get_data_loaders_from_stream(
tokenizer, max_source_length, max_target_length, verbalizer, task_ids=0
)
mapped_stream = train_stream.map(tokenize_function)
# if requires_unwrapping:
# mapped_stream = mapped_stream.flatten()
# TODO: Deprecate and remove stream wrapper & use trainer
wrapped_stream = SimpleIterableStreamWrapper(mapped_stream, shuffle=shuffle)
dataloader = DataLoader(
wrapped_stream, collate_fn=collate_fn, batch_size=batch_size
Expand Down

0 comments on commit 579db79

Please sign in to comment.