Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce C-Rust FFI complexity for HTML CSS image extraction logic #1241

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 6 additions & 27 deletions libclamav/htmlnorm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1882,33 +1882,12 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha
}

if (style_buff != NULL) {
// Found contents of <style> ... </style> block

// Create image extractor for style block
css_image_extractor_t extractor = new_css_image_extractor((const char *)style_buff);
if (NULL != extractor) {
uint8_t *image = NULL;
size_t image_len = 0;
css_image_handle_t image_handle = NULL;

// Extract until there are no more images remaining.
while (false != css_image_extract_next(extractor,
(const uint8_t **)&image,
&image_len,
&image_handle)) {
// Scan each extracted image. The magic scan will figure out the file type.
cl_error_t ret = cli_magic_scan_buff(image, image_len, ctx, NULL, LAYER_ATTRIBUTES_NONE);
if (CL_SUCCESS != ret) {
cli_dbgmsg("Scan of image extracted from html <style> block returned: %s\n", cl_strerror(ret));
free_extracted_image(image_handle);
free_css_image_extractor(extractor);
goto done;
}
free_extracted_image(image_handle);
image_handle = NULL;
}

free_css_image_extractor(extractor);
// Found contents of <style> ... </style> block.
// Search it for images embedded in the CSS.
cl_error_t ret = html_style_block_handler(ctx, (const char *)style_buff);
if (CL_SUCCESS != ret) {
cli_dbgmsg("Scan of image extracted from html <style> block returned: %s\n", cl_strerror(ret));
goto done;
}

free(style_buff);
Expand Down
3 changes: 0 additions & 3 deletions libclamav/htmlnorm.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ typedef struct m_area_tag {
fmap_t *map;
} m_area_t;

typedef void *css_image_extractor_t;
typedef void *css_image_handle_t;

bool html_normalise_mem(cli_ctx *ctx, unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs, const struct cli_dconf *dconf);
bool html_normalise_map(cli_ctx *ctx, fmap_t *map, const char *dirname, tag_arguments_t *hrefs, const struct cli_dconf *dconf);
void html_tag_arg_free(tag_arguments_t *tags);
Expand Down
2 changes: 0 additions & 2 deletions libclamav_rust/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ const BINDGEN_TYPES: &[&str] = &[
"cli_matcher",
"cli_ac_data",
"cli_ac_result",
"css_image_extractor_t",
"css_image_handle_t",
"onedump_t",
];

Expand Down
93 changes: 32 additions & 61 deletions libclamav_rust/src/css_image_extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,16 @@
* MA 02110-1301, USA.
*/

use std::{ffi::CStr, mem::ManuallyDrop, os::raw::c_char};
use std::{ffi::CStr, os::raw::c_char};

use base64::{engine::general_purpose as base64_engine_standard, Engine as _};
use log::{debug, error, warn};
use unicode_segmentation::UnicodeSegmentation;

use crate::sys;
use crate::{
scanners::magic_scan,
sys::{cl_error_t, cl_error_t_CL_ERROR, cl_error_t_CL_SUCCESS, cli_ctx},
};

/// Error enumerates all possible errors returned by this library.
#[derive(thiserror::Error, Debug)]
Expand Down Expand Up @@ -272,80 +275,48 @@ impl<'a> Iterator for CssImageExtractor<'a> {
}
}

/// C interface for CssImageExtractor::new().
/// Handles all the unsafe ffi stuff.
/// C interface to handle HTML style blocks.
///
/// This function looks through the style block for images found in HTML style blocks.
/// It will extract each image and scan it.
///
/// # Safety
///
/// `file_bytes` and `hash_out` must not be NULL
#[export_name = "new_css_image_extractor"]
pub unsafe extern "C" fn new_css_image_extractor(
/// `file_bytes` must not be NULL
#[export_name = "html_style_block_handler"]
pub unsafe extern "C" fn html_style_block_handler(
ctx: *mut cli_ctx,
file_bytes: *const c_char,
) -> sys::css_image_extractor_t {
) -> cl_error_t {
let css_input = if file_bytes.is_null() {
warn!("{} is NULL", stringify!(file_bytes));
return 0 as sys::css_image_extractor_t;
warn!("html css 'file_bytes' buffer is NULL");
return cl_error_t_CL_ERROR;
} else {
#[allow(unused_unsafe)]
unsafe { CStr::from_ptr(file_bytes) }.to_string_lossy()
};

if let Ok(extractor) = CssImageExtractor::new(&css_input) {
Box::into_raw(Box::new(extractor)) as sys::css_image_extractor_t
let extractor = if let Ok(extractor) = CssImageExtractor::new(&css_input) {
extractor
} else {
0 as sys::css_image_extractor_t
}
}
return cl_error_t_CL_ERROR;
};

/// Free the css image extractor
#[no_mangle]
pub extern "C" fn free_css_image_extractor(extractor: sys::css_image_extractor_t) {
if extractor.is_null() {
warn!("Attempted to free an image extractor pointer. Please report this at: https://github.com/Cisco-Talos/clamav/issues");
} else {
let _ = unsafe { Box::from_raw(extractor as *mut CssImageExtractor) };
}
}
let mut scan_result = cl_error_t_CL_SUCCESS;

/// C interface for CssImageExtractor::next().
/// Handles all the unsafe ffi stuff.
///
/// # Safety
///
/// `file_bytes` and `hash_out` must not be NULL
#[export_name = "css_image_extract_next"]
pub unsafe extern "C" fn css_image_extract_next(
extractor: sys::css_image_extractor_t,
image_out: *mut *const u8,
image_out_len: *mut usize,
image_out_handle: *mut sys::css_image_handle_t,
) -> bool {
let mut extractor = ManuallyDrop::new(Box::from_raw(extractor as *mut CssImageExtractor));

let image_result = extractor.next();
match image_result {
Some(image) => {
*image_out = image.as_ptr();
*image_out_len = image.len();
*image_out_handle = Box::into_raw(Box::new(image)) as sys::css_image_handle_t;
true
extractor.into_iter().all(|image| {
debug!("Extracted {}-byte image", image.len());

let ret = magic_scan(ctx, &image, None);
if ret != cl_error_t_CL_SUCCESS {
scan_result = ret;
return false;
}
None => false,
}
}

/// Free an extracted image
///
/// # Safety
///
/// Only call this once you're done using the image_out bytes.
#[no_mangle]
pub extern "C" fn free_extracted_image(image: sys::css_image_handle_t) {
if image.is_null() {
warn!("Attempted to free an image pointer. Please report this at: https://github.com/Cisco-Talos/clamav/issues");
} else {
let _ = unsafe { Box::from_raw(image as *mut Vec<u8>) };
}
true
});

scan_result
}

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion libclamav_rust/src/scanners.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use crate::{

/// Rust wrapper of libclamav's cli_magic_scan_buff() function.
/// Use magic sigs to identify the file type and then scan it.
fn magic_scan(ctx: *mut cli_ctx, buf: &[u8], name: Option<String>) -> cl_error_t {
pub fn magic_scan(ctx: *mut cli_ctx, buf: &[u8], name: Option<String>) -> cl_error_t {
let ptr = buf.as_ptr();
let len = buf.len();

Expand Down
Loading