Fix warning when scanning some HTML files

HTML files with <style> blocks containing non-utf8 sequences are causing warnings when processing them to extract base64 encoded images. To resolve this, we can use the to_string_lossy() method that may allocate and sanitize a copy of the content if the non-utf8 characters are encountered. Resolves: #1082
Cisco-Talos · Nov 13, 2023 · 6e14400 · 6e14400
1 parent 47c079f
commit 6e14400
Showing 1 changed file with 2 additions and 8 deletions.
diff --git a/libclamav_rust/src/css_image_extract.rs b/libclamav_rust/src/css_image_extract.rs
@@ -288,16 +288,10 @@ pub unsafe extern "C" fn new_css_image_extractor(
         return 0 as sys::css_image_extractor_t;
     } else {
         #[allow(unused_unsafe)]
-        match unsafe { CStr::from_ptr(file_bytes) }.to_str() {
-            Err(e) => {
-                warn!("{} is not valid unicode: {}", stringify!(file_bytes), e);
-                return 0 as sys::css_image_extractor_t;
-            }
-            Ok(s) => s,
-        }
+        unsafe { CStr::from_ptr(file_bytes) }.to_string_lossy()
     };
 
-    if let Ok(extractor) = CssImageExtractor::new(css_input) {
+    if let Ok(extractor) = CssImageExtractor::new(&css_input) {
         Box::into_raw(Box::new(extractor)) as sys::css_image_extractor_t
     } else {
         0 as sys::css_image_extractor_t