From 67683da0a55c30bd4ad1521abea2ab481dfac7c8 Mon Sep 17 00:00:00 2001 From: Jake Fennick Date: Tue, 19 Sep 2023 11:23:24 -0600 Subject: [PATCH] Replace malformed nvidia-smi XML --- cwltool/cuda.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cwltool/cuda.py b/cwltool/cuda.py index 719bfd8677..568608b25b 100644 --- a/cwltool/cuda.py +++ b/cwltool/cuda.py @@ -15,7 +15,14 @@ def cuda_version_and_device_count() -> Tuple[str, int]: except Exception as e: _logger.warning("Error checking CUDA version with nvidia-smi: %s", e) return ("", 0) - dm = xml.dom.minidom.parseString(out) # nosec + + # Apparently nvidia-smi is not safe to call concurrently. + # With --parallel, sometimes the returned XML will contain + # \xff...\xff + # and xml.dom.minidom.parseString will raise + # "xml.parsers.expat.ExpatError: not well-formed (invalid token)" + out_no_xff = out.replace(b'\xff', b'') + dm = xml.dom.minidom.parseString(out_no_xff) # nosec ag = dm.getElementsByTagName("attached_gpus") if len(ag) < 1 or ag[0].firstChild is None: