Skip to content

Commit

Permalink
Add chunk example, use extend
Browse files Browse the repository at this point in the history
Signed-off-by: Alex-Brooks <[email protected]>
  • Loading branch information
alex-jw-brooks committed Oct 2, 2023
1 parent ccd9c13 commit 52f9910
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,14 @@ def _causal_lm_as_chunked(
# Force everything to a list of batch encodings; for non-batch mode, this just
# puts it into a list. For batch mode, we get a list of batch encodings,
# allowing us to standardize subsequent processing a bit.
#
# For example, given chunk size 2, we might have something like:
# [
# {'input_ids': [31, 48], 'attention_mask': [1, 1]},
# {'input_ids': [47, 1], 'attention_mask': [1, 1]},
# ...
# ]
# (where the above objects are batch encodings, which are a subclass of dict)
source_id_chunks = cls._force_to_batch_encoding_list_of_chunks(
source_ids, target_ids, batched_mode, task_ids, chunk_size, drop_remainder
)
Expand Down Expand Up @@ -330,7 +338,7 @@ def _concatenate_encodings(left: BatchEncoding, right: BatchEncoding) -> None:
encoding. Corresponds to target.
"""
for k in left.keys():
left[k] = left[k] + right[k]
left[k].extend(right[k])

@staticmethod
def _split_encoding_into_chunks(
Expand Down

0 comments on commit 52f9910

Please sign in to comment.