Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
Co-authored-by: Arthur <[email protected]>
  • Loading branch information
SunMarc and ArthurZucker authored Dec 13, 2023
1 parent 6a517d8 commit 69d7f1d
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/transformers/utils/quantization_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,9 @@ class GPTQConfig(QuantizationConfigMixin):
cache_block_outputs (`bool`, *optional*, defaults to `True`):
Whether to cache block outputs to reuse as inputs for the succeeding block.
modules_in_block_to_quantize (`List[List[str]]`, *optional*):
List list of module names to quantize in the block specified. This argument is useful to exclude certain linear modules from being quantized.
List of list of module names to quantize in the specified block. This argument is useful to exclude certain linear modules from being quantized.
The block to quantize can be specified by setting `block_name_to_quantize`. We will quantize each list sequentially. If not set, we will quantize all linear layers.
Example: `inside_layer_modules=[["self_attention.query_key_value"], ["mlp.dense_h_to_4h"]]`
Example: `modules_in_block_to_quantize =[["self_attention.query_key_value"], ["mlp.dense_h_to_4h"]]`
"""

def __init__(
Expand Down

0 comments on commit 69d7f1d

Please sign in to comment.