unslothai · Sehyo · Aug 30, 2024 · Aug 30, 2024 · Aug 30, 2024 · Datta0
diff --git a/unsloth/tokenizer_utils.py b/unsloth/tokenizer_utils.py
@@ -1091,12 +1091,20 @@ def add_new_tokens(
 
 
 def check_nvidia():
+    index_for_cuda = -1
+    if "CUDA_VISIBLE_DEVICES" in os.environ:
+        index_for_cuda = os.environ["CUDA_VISIBLE_DEVICES"]
-    index_for_cuda = -1
-    if "CUDA_VISIBLE_DEVICES" in os.environ:
-        index_for_cuda = os.environ["CUDA_VISIBLE_DEVICES"]
+    index_for_cuda = os.environ.get("CUDA_VISIBLE_DEVICES", -1)
-    index_for_cuda = -1
-    if "CUDA_VISIBLE_DEVICES" in os.environ:
-        index_for_cuda = os.environ["CUDA_VISIBLE_DEVICES"]
+    index_for_cuda = os.environ.get("CUDA_VISIBLE_DEVICES", -1)
+        if "," in index_for_cuda:
+            raise RuntimeError("Unsloth currently does not support multi GPU setups - but we are working on it!")
+        index_for_cuda = int(index_for_cuda)
     # Unsloth doesn't work yet on AMD devices - we're working on it!
     output = np.array([0,])
     try:
         output = subprocess.check_output("nvidia-smi --query-gpu=memory.used --format=csv", shell = True)
         output = re.findall(rb'([\d]{1,})[\s]{1,}M', output)
         output = np.array([int(x.decode('utf-8'))/1024 for x in output])
+        if index_for_cuda != -1:
+            output = np.array([output[index_for_cuda],])
     except:
         if not torch.cuda.is_available():
             raise RuntimeError("Unsloth: We do not support AMD / Intel machines yet - it is a work in progress!")
@@ -1160,11 +1168,20 @@ def patch_sft_trainer_tokenizer():
         "    )\n"\
         "pass\n"\
         "import subprocess, re, gc, numpy as np\n"\
+        "import os\n"\
+        "index_for_cuda = -1\n"\
+        "if \"CUDA_VISIBLE_DEVICES\" in os.environ:\n"\
+        "    index_for_cuda = os.environ[\"CUDA_VISIBLE_DEVICES\"]\n"\
+        "    if \",\" in index_for_cuda:\n"\
+        "        raise RuntimeError(\"Unsloth currently does not support multi GPU setups - but we are working on it!\")\n"\
+        "    index_for_cuda = int(index_for_cuda)\n"\
         "a = np.array([0,])\n"\
         "try:\n"\
         "    a = subprocess.check_output('nvidia-smi --query-gpu=memory.used --format=csv', shell = True)\n"\
         "    a = re.findall(rb'([\\d]{1,})[\\s]{1,}M', a)\n"\
         "    a = np.array([int(x.decode('utf-8'))/1024 for x in a])\n"\
+        "    if index_for_cuda != -1:\n"\
+        "        a = np.array([a[index_for_cuda],])\n"\
         "except:\n"\
         "    if not torch.cuda.is_available():\n"\
         "        raise RuntimeError('Unsloth: We do not support AMD / Intel machines yet - it is a work in progress!')\n"\