Skip to content

Commit

Permalink
load quantized model with attention kwargs
Browse files Browse the repository at this point in the history
  • Loading branch information
mobicham committed May 6, 2024
1 parent 108cdf6 commit 474f09b
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions hqq/models/hf/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,15 @@ def cache_model(cls, model, save_dir):

# Create empty model from config
@classmethod
def create_model(cls, save_dir):
config = transformers.AutoConfig.from_pretrained(cls.get_config_file(save_dir))
def create_model(cls, save_dir, kwargs):
config_kwargs = {}
for key in ["attn_implementation"]:
if key in kwargs:
config_kwargs[key] = kwargs[key]

config = transformers.AutoConfig.from_pretrained(
cls.get_config_file(save_dir), **config_kwargs
)

auto_class = transformers.AutoModel

Expand Down

0 comments on commit 474f09b

Please sign in to comment.