bitsandbytes-foundation · Titus-von-Koeller · Feb 21, 2024 · Feb 12, 2024 · Feb 12, 2024 · Feb 12, 2024
diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
@@ -2,6 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+import copy
 from typing import Any, Dict, Optional, TypeVar, Union, overload
 import warnings
 
@@ -213,6 +214,34 @@ def __new__(
         self.data = data
         self.module = module
         return self
+
+    def __getstate__(self):
+        state = self.__dict__
+        state["data"] = self.data
+        state["requires_grad"] = self.requires_grad
+        return state
+
+    def __setstate__(self, state):
+        self.requires_grad = state["requires_grad"]
+        self.blocksize = state["blocksize"]
+        self.compress_statistics = state["compress_statistics"]
+        self.quant_type = state["quant_type"]
+        self.quant_state = state["quant_state"]
+        self.data = state["data"]
+
+    def __deepcopy__(self,memo):
+        new_instance = type(self).__new__(type(self))
+        state = self.__getstate__()
+        new_instance.__setstate__(state)
+        new_instance.quant_state = copy.deepcopy(state["quant_state"])
+        new_instance.data = copy.deepcopy(state["data"])
+        return new_instance
+
+    def __copy__(self):
+        new_instance = type(self).__new__(type(self))
+        state = self.__getstate__()
+        new_instance.__setstate__(state)
+        return new_instance
 
     @classmethod
     def from_prequantized(cls, data: torch.Tensor, quantized_stats: Dict[str, Any], requires_grad: bool = False, device='cuda', **kwargs) -> "Params4bit":

diff --git a/tests/test_functional.py b/tests/test_functional.py
@@ -9,8 +9,8 @@
 from scipy.stats import norm
 import torch
 
-import bitsandbytes as bnb
 from bitsandbytes import functional as F
+import bitsandbytes as bnb
 from tests.helpers import (
     BOOLEAN_TUPLES,
     TRUE_FALSE,

diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py
@@ -1,3 +1,4 @@
+import copy
 import os
 from tempfile import TemporaryDirectory
 
@@ -146,3 +147,18 @@ def test_linear_serialization(quant_type, compress_statistics, bias, quant_stora
         target_compression = 0.143 if original_dtype == torch.float32 else 0.29  # these numbers get lower as weight shape increases
         ratio_error_msg = f"quantized_size {size_4:,} is larger on disk than {target_compression:.2%} of original size {size_orig:,}"
         assert size_ratio < target_compression, ratio_error_msg
+
+def test_copy_param():
+    tensor = torch.tensor([1.,2.,3.,4.])
+    param = bnb.nn.Params4bit(data = tensor, requires_grad=False).cuda(0)
+
+    shallow_copy_param = copy.copy(param)
+    assert param.quant_state is shallow_copy_param.quant_state
+    assert param.data.data_ptr() == shallow_copy_param.data.data_ptr()
+
+def test_deepcopy_param():
+    tensor = torch.tensor([1.,2.,3.,4.])
+    param = bnb.nn.Params4bit(data = tensor, requires_grad=False).cuda(0)
+    copy_param = copy.deepcopy(param)
+    assert param.quant_state is not copy_param.quant_state
+    assert param.data.data_ptr() != copy_param.data.data_ptr()
diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py
@@ -5,8 +5,8 @@
 import pytest
 import torch
 
-import bitsandbytes as bnb
 from bitsandbytes import functional as F
+import bitsandbytes as bnb
 from bitsandbytes.autograd import get_inverse_transform_indices, undo_layout
 from bitsandbytes.nn.modules import Linear8bitLt
 from tests.helpers import TRUE_FALSE, id_formatter