Skip to content

Commit

Permalink
Reduce C-Rust FFI complexity for HTML CSS image extraction logic
Browse files Browse the repository at this point in the history
The C-Rust FFI code is needlessly complex. Now that we are calling into
magic_scan from Rust, we can simply hand off the <style> block contents
to Rust code to handle extraction and scanning.
  • Loading branch information
micahsnyder committed Apr 14, 2024
1 parent a844e81 commit af4270b
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 94 deletions.
33 changes: 6 additions & 27 deletions libclamav/htmlnorm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1882,33 +1882,12 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha
}

if (style_buff != NULL) {
// Found contents of <style> ... </style> block

// Create image extractor for style block
css_image_extractor_t extractor = new_css_image_extractor((const char *)style_buff);
if (NULL != extractor) {
uint8_t *image = NULL;
size_t image_len = 0;
css_image_handle_t image_handle = NULL;

// Extract until there are no more images remaining.
while (false != css_image_extract_next(extractor,
(const uint8_t **)&image,
&image_len,
&image_handle)) {
// Scan each extracted image. The magic scan will figure out the file type.
cl_error_t ret = cli_magic_scan_buff(image, image_len, ctx, NULL, LAYER_ATTRIBUTES_NONE);
if (CL_SUCCESS != ret) {
cli_dbgmsg("Scan of image extracted from html <style> block returned: %s\n", cl_strerror(ret));
free_extracted_image(image_handle);
free_css_image_extractor(extractor);
goto done;
}
free_extracted_image(image_handle);
image_handle = NULL;
}

free_css_image_extractor(extractor);
// Found contents of <style> ... </style> block.
// Search it for images embedded in the CSS.
cl_error_t ret = html_style_block_handler(ctx, (const char *)style_buff);
if (CL_SUCCESS != ret) {
cli_dbgmsg("Scan of image extracted from html <style> block returned: %s\n", cl_strerror(ret));
goto done;
}

free(style_buff);
Expand Down
3 changes: 0 additions & 3 deletions libclamav/htmlnorm.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ typedef struct m_area_tag {
fmap_t *map;
} m_area_t;

typedef void *css_image_extractor_t;
typedef void *css_image_handle_t;

bool html_normalise_mem(cli_ctx *ctx, unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs, const struct cli_dconf *dconf);
bool html_normalise_map(cli_ctx *ctx, fmap_t *map, const char *dirname, tag_arguments_t *hrefs, const struct cli_dconf *dconf);
void html_tag_arg_free(tag_arguments_t *tags);
Expand Down
2 changes: 0 additions & 2 deletions libclamav_rust/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ const BINDGEN_TYPES: &[&str] = &[
"cli_matcher",
"cli_ac_data",
"cli_ac_result",
"css_image_extractor_t",
"css_image_handle_t",
"onedump_t",
];

Expand Down
93 changes: 32 additions & 61 deletions libclamav_rust/src/css_image_extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,16 @@
* MA 02110-1301, USA.
*/

use std::{ffi::CStr, mem::ManuallyDrop, os::raw::c_char};
use std::{ffi::CStr, os::raw::c_char};

use base64::{engine::general_purpose as base64_engine_standard, Engine as _};
use log::{debug, error, warn};
use unicode_segmentation::UnicodeSegmentation;

use crate::sys;
use crate::{
scanners::magic_scan,
sys::{cl_error_t, cl_error_t_CL_ERROR, cl_error_t_CL_SUCCESS, cli_ctx},
};

/// Error enumerates all possible errors returned by this library.
#[derive(thiserror::Error, Debug)]
Expand Down Expand Up @@ -272,80 +275,48 @@ impl<'a> Iterator for CssImageExtractor<'a> {
}
}

/// C interface for CssImageExtractor::new().
/// Handles all the unsafe ffi stuff.
/// C interface to handle HTML style blocks.
///
/// This function looks through the style block for images found in HTML style blocks.
/// It will extract each image and scan it.
///
/// # Safety
///
/// `file_bytes` and `hash_out` must not be NULL
#[export_name = "new_css_image_extractor"]
pub unsafe extern "C" fn new_css_image_extractor(
/// `file_bytes` must not be NULL
#[export_name = "html_style_block_handler"]
pub unsafe extern "C" fn html_style_block_handler(
ctx: *mut cli_ctx,
file_bytes: *const c_char,
) -> sys::css_image_extractor_t {
) -> cl_error_t {
let css_input = if file_bytes.is_null() {
warn!("{} is NULL", stringify!(file_bytes));
return 0 as sys::css_image_extractor_t;
warn!("html css 'file_bytes' buffer is NULL");
return cl_error_t_CL_ERROR;
} else {
#[allow(unused_unsafe)]
unsafe { CStr::from_ptr(file_bytes) }.to_string_lossy()
};

if let Ok(extractor) = CssImageExtractor::new(&css_input) {
Box::into_raw(Box::new(extractor)) as sys::css_image_extractor_t
let extractor = if let Ok(extractor) = CssImageExtractor::new(&css_input) {
extractor
} else {
0 as sys::css_image_extractor_t
}
}
return cl_error_t_CL_ERROR;
};

/// Free the css image extractor
#[no_mangle]
pub extern "C" fn free_css_image_extractor(extractor: sys::css_image_extractor_t) {
if extractor.is_null() {
warn!("Attempted to free an image extractor pointer. Please report this at: https://github.com/Cisco-Talos/clamav/issues");
} else {
let _ = unsafe { Box::from_raw(extractor as *mut CssImageExtractor) };
}
}
let mut scan_result = cl_error_t_CL_SUCCESS;

/// C interface for CssImageExtractor::next().
/// Handles all the unsafe ffi stuff.
///
/// # Safety
///
/// `file_bytes` and `hash_out` must not be NULL
#[export_name = "css_image_extract_next"]
pub unsafe extern "C" fn css_image_extract_next(
extractor: sys::css_image_extractor_t,
image_out: *mut *const u8,
image_out_len: *mut usize,
image_out_handle: *mut sys::css_image_handle_t,
) -> bool {
let mut extractor = ManuallyDrop::new(Box::from_raw(extractor as *mut CssImageExtractor));

let image_result = extractor.next();
match image_result {
Some(image) => {
*image_out = image.as_ptr();
*image_out_len = image.len();
*image_out_handle = Box::into_raw(Box::new(image)) as sys::css_image_handle_t;
true
extractor.into_iter().all(|image| {
debug!("Extracted {}-byte image", image.len());

let ret = magic_scan(ctx, &image, None);
if ret != cl_error_t_CL_SUCCESS {
scan_result = ret;
return false;
}
None => false,
}
}

/// Free an extracted image
///
/// # Safety
///
/// Only call this once you're done using the image_out bytes.
#[no_mangle]
pub extern "C" fn free_extracted_image(image: sys::css_image_handle_t) {
if image.is_null() {
warn!("Attempted to free an image pointer. Please report this at: https://github.com/Cisco-Talos/clamav/issues");
} else {
let _ = unsafe { Box::from_raw(image as *mut Vec<u8>) };
}
true
});

scan_result
}

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion libclamav_rust/src/scanners.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ use crate::{

/// Rust wrapper of libclamav's cli_magic_scan_buff() function.
/// Use magic sigs to identify the file type and then scan it.
fn magic_scan(ctx: *mut cli_ctx, buf: &[u8], name: Option<String>) -> cl_error_t {
pub fn magic_scan(ctx: *mut cli_ctx, buf: &[u8], name: Option<String>) -> cl_error_t {
let ptr = buf.as_ptr();
let len = buf.len();

Expand Down

0 comments on commit af4270b

Please sign in to comment.