Skip to content

Commit

Permalink
Generate: assistant should sample when the main model samples (huggin…
Browse files Browse the repository at this point in the history
  • Loading branch information
gante authored Sep 20, 2024
1 parent dc8b6ea commit 77c5d59
Showing 1 changed file with 0 additions and 6 deletions.
6 changes: 0 additions & 6 deletions src/transformers/generation/candidate_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,6 @@ def __init__(
self.generation_config.output_scores = True
self.generation_config.assistant_confidence_threshold = self.assistant_confidence_threshold

# Disable sampling -- this implementation of assisted generation/speculative decoding uses the assistant
# greedily to maximize matches. Disables sampling-related flags to prevent warnings
self.generation_config.do_sample = False
for attr in ("temperature", "top_p", "min_p", "typical_p", "top_k", "epsilon_cutoff", "eta_cutoff"):
setattr(self.generation_config, attr, None)

# avoid unnecessary warnings that min_length is larger than max_new_tokens
# remove the `MinLengthLogitsProcessor` if exists (NOTE: no need to check for `MinNewTokensLogitsProcessor`)
self.main_model_min_length = self.generation_config.min_length
Expand Down

0 comments on commit 77c5d59

Please sign in to comment.