Skip to content

Commit

Permalink
select quant_linear with pack
Browse files Browse the repository at this point in the history
  • Loading branch information
LRL-ModelCloud committed Dec 25, 2024
1 parent d21256c commit c762c14
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions optimum/gptq/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def __init__(
)
self.exllama_version = self.exllama_config["version"]

def select_quant_linear(self, device_map: Union[str, dict]):
def select_quant_linear(self, device_map: Union[str, dict], pack=False):
if is_gptqmodel_available():
self.quant_linear = hf_select_quant_linear(
bits=self.bits,
Expand All @@ -231,6 +231,7 @@ def select_quant_linear(self, device_map: Union[str, dict]):
meta=self.meta,
device_map=device_map,
backend=self.backend,
pack=pack,
)
else:
self.quant_linear = hf_select_quant_linear(
Expand Down Expand Up @@ -301,7 +302,7 @@ def convert_model(self, model: nn.Module, **kwargs):
)
del layers_to_be_replaced[name]

self.select_quant_linear(device_map=kwargs.get("device_map", None))
self.select_quant_linear(device_map=kwargs.get("device_map", None), pack=False)

self._replace_by_quant_layers(model, layers_to_be_replaced)

Expand Down Expand Up @@ -761,7 +762,7 @@ def pack_model(
layers = get_layers(model)
layers = {n: layers[n] for n in quantizers}

self.select_quant_linear(device_map=model.hf_device_map)
self.select_quant_linear(device_map=model.hf_device_map, pack=True)

self._replace_by_quant_layers(model, quantizers)
qlayers = get_layers(model, [self.quant_linear])
Expand Down

0 comments on commit c762c14

Please sign in to comment.