diff --git a/Readme.md b/Readme.md index 38ebc1a..a910fb5 100755 --- a/Readme.md +++ b/Readme.md @@ -5,6 +5,8 @@ This folder contains the code to perform Half-Quadratic Quantization (HQQ HQQ is a fast and accurate model quantizer that skips the need for calibration data. It's super simple to implement (just a few lines of code for the optimizer). It can crunch through quantizing the Llama2-70B model in only 4 minutes! 🚀 ### Installation +First, make sure you have a Pytorch 2 version that matches your CUDA version: https://pytorch.org/ + You can install hqq via ```pip install hqq```. To get the latest version, you can install the core library directly via ```pip install git+https://github.com/mobiusml/hqq.git```. diff --git a/setup.py b/setup.py index a63bd75..1f22332 100755 --- a/setup.py +++ b/setup.py @@ -9,5 +9,5 @@ author_email='hicham@mobiuslabs.com', license='Apache 2', packages=['hqq', 'hqq/core', 'hqq/engine', 'hqq/models', 'hqq/models/hf', 'hqq/models/timm', 'hqq/models/vllm'], - install_requires=['numpy>=1.24.4','tqdm>=4.64.1', 'torch>=2.1.1', 'huggingface_hub', 'accelerate', 'timm', 'transformers>=4.36.1', 'termcolor'], #add vllm/langchain? + install_requires=['numpy>=1.24.4','tqdm>=4.64.1', 'huggingface_hub', 'accelerate', 'timm', 'transformers>=4.36.1', 'termcolor'], #'torch>=2.1.1', add vllm/langchain? )