diff --git a/Cargo.lock b/Cargo.lock index ceb99f0865..6450f43c9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + [[package]] name = "aho-corasick" version = "1.1.2" @@ -97,6 +103,27 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "cbindgen" version = "0.25.0" @@ -115,6 +142,15 @@ dependencies = [ "toml", ] +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + [[package]] name = "cexpr" version = "0.6.0" @@ -136,11 +172,14 @@ version = "0.0.1" dependencies = [ "base64", "bindgen", + "byteorder", + "bzip2", "cbindgen", "flate2", "hex", "hex-literal", "image", + "inflate", "libc", "log", "num-traits", @@ -430,6 +469,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "inflate" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cdb29978cc5797bd8dcc8e5bf7de604891df2a8dc576973d71a281e916db2ff" +dependencies = [ + "adler32", +] + [[package]] name = "itertools" version = "0.10.5" @@ -624,6 +672,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + [[package]] name = "png" version = "0.17.10" diff --git a/clamd/server-th.c b/clamd/server-th.c index 5de65204df..eacf334beb 100644 --- a/clamd/server-th.c +++ b/clamd/server-th.c @@ -1298,6 +1298,13 @@ int recvloop(int *socketds, unsigned nsockets, struct cl_engine *engine, unsigne logg(LOGG_INFO, "OneNote support disabled.\n"); } + if (optget(opts, "ScanAlz")->enabled) { + logg(LOGG_INFO, "Alz support enabled.\n"); + options.parse |= CL_SCAN_PARSE_ALZ; + } else { + logg(LOGG_INFO, "Alz support disabled.\n"); + } + if (optget(opts, "PhishingScanURLs")->enabled) { /* TODO: Remove deprecated option in a future feature release */ if ((optget(opts, "PhishingAlwaysBlockCloak")->enabled) || diff --git a/clamscan/clamscan.c b/clamscan/clamscan.c index bf391cff4f..a220f11496 100644 --- a/clamscan/clamscan.c +++ b/clamscan/clamscan.c @@ -305,6 +305,7 @@ void help(void) mprintf(LOGG_INFO, " --scan-xmldocs[=yes(*)/no] Scan xml-based document files\n"); mprintf(LOGG_INFO, " --scan-hwp3[=yes(*)/no] Scan HWP3 files\n"); mprintf(LOGG_INFO, " --scan-onenote[=yes(*)/no] Scan OneNote files\n"); + mprintf(LOGG_INFO, " --scan-alz[=yes(*)/no] Scan alz files\n"); mprintf(LOGG_INFO, " --scan-archive[=yes(*)/no] Scan archive files (supported by libclamav)\n"); mprintf(LOGG_INFO, " --alert-broken[=yes/no(*)] Alert on broken executable files (PE & ELF)\n"); mprintf(LOGG_INFO, " --alert-broken-media[=yes/no(*)] Alert on broken graphics files (JPEG, TIFF, PNG, GIF)\n"); diff --git a/clamscan/manager.c b/clamscan/manager.c index eec5cf1985..1280651cef 100644 --- a/clamscan/manager.c +++ b/clamscan/manager.c @@ -1555,6 +1555,11 @@ int scanmanager(const struct optstruct *opts) if (optget(opts, "scan-onenote")->enabled) options.parse |= CL_SCAN_PARSE_ONENOTE; + if (optget(opts, "scan-alz")->enabled) { + /*TODO: Consider just having this for archives.*/ + options.parse |= CL_SCAN_PARSE_ALZ; + } + /* TODO: Remove deprecated option in a future feature release */ if ((optget(opts, "algorithmic-detection")->enabled) && /* && used due to default-yes for both options */ (optget(opts, "heuristic-alerts")->enabled)) { diff --git a/common/optparser.c b/common/optparser.c index 8a4197720a..e471a4ecc7 100644 --- a/common/optparser.c +++ b/common/optparser.c @@ -436,6 +436,8 @@ const struct clam_option __clam_options[] = { {"ScanOneNote", "scan-onenote", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 1, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "This option enables scanning OneNote files.\nIf you turn off this option, the original files will still be scanned, but\nwithout additional processing.", "yes"}, + {"ScanAlz", "scan-alz", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 1, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "This option enables scanning Alz files.\nIf you turn off this option, the original files will still be scanned, but\nwithout additional processing.", "yes"}, + {"ScanArchive", "scan-archive", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 1, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "Scan within archives and compressed files.\nIf you turn off this option, the original files will still be scanned, but\nwithout unpacking and additional processing.", "yes"}, {"ForceToDisk", "force-to-disk", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 0, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "This option causes memory or nested map scans to dump the content to disk.\nIf you turn on this option, more data is written to disk and is available\nwhen the leave-temps option is enabled at the cost of more disk writes.", "no"}, diff --git a/libclamav/clamav.h b/libclamav/clamav.h index 3dd6666969..710e297cd3 100644 --- a/libclamav/clamav.h +++ b/libclamav/clamav.h @@ -181,6 +181,7 @@ struct cl_scan_options { #define CL_SCAN_PARSE_HTML 0x100 #define CL_SCAN_PARSE_PE 0x200 #define CL_SCAN_PARSE_ONENOTE 0x400 +#define CL_SCAN_PARSE_ALZ 0x800 /* heuristic alerting options */ #define CL_SCAN_HEURISTIC_BROKEN 0x2 /* alert on broken PE and broken ELF files */ diff --git a/libclamav/dconf.c b/libclamav/dconf.c index 2b0a9952fb..bd64c0d06e 100644 --- a/libclamav/dconf.c +++ b/libclamav/dconf.c @@ -107,6 +107,7 @@ static struct dconf_module modules[] = { {"ARCHIVE", "APM", ARCH_CONF_APM, 1}, {"ARCHIVE", "EGG", ARCH_CONF_EGG, 1}, {"ARCHIVE", "UDF", ARCH_CONF_UDF, 1}, + {"ARCHIVE", "ALZ", ARCH_CONF_ALZ, 1}, {"DOCUMENT", "HTML", DOC_CONF_HTML, 1}, {"DOCUMENT", "RTF", DOC_CONF_RTF, 1}, diff --git a/libclamav/dconf.h b/libclamav/dconf.h index 038ee0e504..7f1139a3bf 100644 --- a/libclamav/dconf.h +++ b/libclamav/dconf.h @@ -97,6 +97,7 @@ struct cli_dconf { #define ARCH_CONF_APM 0x2000000 #define ARCH_CONF_EGG 0x4000000 #define ARCH_CONF_UDF 0x8000000 +#define ARCH_CONF_ALZ 0x10000000 /* Document flags */ #define DOC_CONF_HTML 0x1 diff --git a/libclamav/filetypes.c b/libclamav/filetypes.c index 8cd4dcb1f2..693fdb9383 100644 --- a/libclamav/filetypes.c +++ b/libclamav/filetypes.c @@ -138,6 +138,7 @@ static const struct ftmap_s { { "CL_TYPE_EGG", CL_TYPE_EGG }, { "CL_TYPE_EGGSFX", CL_TYPE_EGGSFX }, { "CL_TYPE_UDF", CL_TYPE_UDF }, + { "CL_TYPE_ALZ", CL_TYPE_ALZ }, { "CL_TYPE_ONENOTE", CL_TYPE_ONENOTE }, { "CL_TYPE_PYTHON_COMPILED", CL_TYPE_PYTHON_COMPILED }, { NULL, CL_TYPE_IGNORED } diff --git a/libclamav/filetypes.h b/libclamav/filetypes.h index 9a553d92ce..9db0e645e1 100644 --- a/libclamav/filetypes.h +++ b/libclamav/filetypes.h @@ -125,6 +125,7 @@ typedef enum cli_file { CL_TYPE_MHTML, CL_TYPE_LNK, CL_TYPE_UDF, + CL_TYPE_ALZ, CL_TYPE_OTHER, /* on-the-fly, used for target 14 (OTHER) */ CL_TYPE_IGNORED /* please don't add anything below */ } cli_file_t; diff --git a/libclamav/filetypes_int.h b/libclamav/filetypes_int.h index 0edbd04339..e1572a4c4c 100644 --- a/libclamav/filetypes_int.h +++ b/libclamav/filetypes_int.h @@ -298,5 +298,6 @@ static const char *ftypes_int[] = { "0:0:00010d0a:PyPy 3.8 byte-compiled (.pyc):CL_TYPE_ANY:CL_TYPE_PYTHON_COMPILED:200", "0:0:50010d0a:PyPy 3.9 byte-compiled (.pyc):CL_TYPE_ANY:CL_TYPE_PYTHON_COMPILED:200", "1:0:??0d0d0a:Python 3.7 or newer byte-compiled (.pyc):CL_TYPE_ANY:CL_TYPE_PYTHON_COMPILED:200", + "0:0:414c5a01:ALZ:CL_TYPE_ANY:CL_TYPE_ALZ:210", NULL}; #endif diff --git a/libclamav/others.h b/libclamav/others.h index 670189af88..8ccfddc464 100644 --- a/libclamav/others.h +++ b/libclamav/others.h @@ -570,6 +570,7 @@ extern LIBCLAMAV_EXPORT int have_rar; #define SCAN_PARSE_HTML (ctx->options->parse & CL_SCAN_PARSE_HTML) #define SCAN_PARSE_PE (ctx->options->parse & CL_SCAN_PARSE_PE) #define SCAN_PARSE_ONENOTE (ctx->options->parse & CL_SCAN_PARSE_ONENOTE) +#define SCAN_PARSE_ALZ (ctx->options->parse & CL_SCAN_PARSE_ALZ) #define SCAN_HEURISTIC_BROKEN (ctx->options->heuristic & CL_SCAN_HEURISTIC_BROKEN) #define SCAN_HEURISTIC_BROKEN_MEDIA (ctx->options->heuristic & CL_SCAN_HEURISTIC_BROKEN_MEDIA) diff --git a/libclamav/scanners.c b/libclamav/scanners.c index 45871b8cde..7ab2f035ae 100644 --- a/libclamav/scanners.c +++ b/libclamav/scanners.c @@ -4586,6 +4586,11 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) if (SCAN_PARSE_ONENOTE && (DCONF_ARCH & DOC_CONF_ONENOTE)) ret = scan_onenote(ctx); break; + case CL_TYPE_ALZ: + if (SCAN_PARSE_ALZ && (DCONF_ARCH & ARCH_CONF_ALZ)) { + ret = extract_alz(ctx); + } + break; case CL_TYPE_OOXML_WORD: case CL_TYPE_OOXML_PPT: diff --git a/libclamav_rust/Cargo.toml b/libclamav_rust/Cargo.toml index 26a3a22b3a..c864e14062 100644 --- a/libclamav_rust/Cargo.toml +++ b/libclamav_rust/Cargo.toml @@ -22,6 +22,9 @@ unicode-segmentation = "1.10.1" bindgen = "0.65" onenote_parser = { git = "https://github.com/Cisco-Talos/onenote.rs.git", branch = "CLAM-2329-new-from-slice" } hex-literal = "0.4.1" +inflate = "0.4.5" +bzip2 = "0.4.4" +byteorder = "1.5.0" [lib] crate-type = ["staticlib"] diff --git a/libclamav_rust/cbindgen.toml b/libclamav_rust/cbindgen.toml index 28e146e358..1886a1b5b9 100644 --- a/libclamav_rust/cbindgen.toml +++ b/libclamav_rust/cbindgen.toml @@ -37,6 +37,7 @@ include = [ "evidence::evidence_add_indicator", "evidence::IndicatorType", "scanners::scan_onenote", + "scanners::extract_alz", ] # prefix = "CAPI_" diff --git a/libclamav_rust/src/alz.rs b/libclamav_rust/src/alz.rs new file mode 100644 index 0000000000..7f51c36ca0 --- /dev/null +++ b/libclamav_rust/src/alz.rs @@ -0,0 +1,559 @@ +/* + #![warn( + clippy::all, + clippy::restriction, + clippy::pedantic, + clippy::nursery, + clippy::cargo, + )] + */ + +use inflate::InflateStream; +use std::io::Cursor; +use std::io::Read; +use std::error; +use std::fmt; +use byteorder::{LittleEndian, ReadBytesExt}; +use bzip2::read::BzDecoder; +use log::{info}; + +/// File header +const ALZ_FILE_HEADER: u32 = 0x015a_4c41; +///Local file header +const ALZ_LOCAL_FILE_HEADER: u32 = 0x015a_4c42; +///Central directory header +const ALZ_CENTRAL_DIRECTORY_HEADER: u32 = 0x015a_4c43; +///End of Central directory header +const ALZ_END_OF_CENTRAL_DIRECTORY_HEADER: u32 = 0x025a_4c43; + +/* ERRORS */ +#[derive(Debug, Clone)] +pub struct ALZParseError { + desc: String, +} + +impl fmt::Display for ALZParseError { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.desc) + } +} + +impl error::Error for ALZParseError {} + +impl ALZParseError { + pub fn new(description: impl Into) -> Self { + Self { + desc: description.into(), + } + } + + #[must_use] + pub fn get_description(&self) -> String { + self.desc.to_string() + } +} + +#[derive(Debug, Clone)] +struct ALZUnsupportedError { + desc: String, +} + +impl fmt::Display for ALZUnsupportedError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.desc) + } +} + +impl error::Error for ALZUnsupportedError {} + +impl ALZUnsupportedError { + const fn new(description: String) -> Self { + Self { desc: description } + } +} + +#[derive(Debug)] +struct ALZExtractError; +/* END ERRORS */ + + + +struct AlzLocalFileHeaderHead { + file_name_length: u16, + + file_attribute: u8, + + file_time_date: u32, + + file_descriptor: u8, + + unknown: u8, +} + +const ALZ_ENCR_HEADER_LEN: u32 = 12; + +struct AlzLocalFileHeader { + head: AlzLocalFileHeaderHead, + + compression_method: u8, + unknown: u8, + file_crc: u32, + + /* Can be smaller sizes, depending on file_descriptor/0x10 .*/ + compressed_size: u64, + uncompressed_size: u64, + + file_name: String, + + enc_chk: [u8; ALZ_ENCR_HEADER_LEN as usize], + + start_of_compressed_data: u64, +} + +#[allow(dead_code)] +enum AlzFileAttribute { + Readonly = 0x1, + Hidden = 0x2, + Directory = 0x10, + File = 0x20, +} + +impl AlzLocalFileHeader { + const fn is_encrypted(&self) -> bool { + 0 != (self.head.file_descriptor & 0x1) + } + + const fn is_data_descriptor(&self) -> bool { + 0 != (self.head.file_descriptor & 0x8) + } + + const fn is_directory(&self) -> bool { + 0 != ((AlzFileAttribute::Directory as u8) & self.head.file_attribute) + } + + const fn _is_file(&self) -> bool { + 0 != ((AlzFileAttribute::File as u8) & self.head.file_attribute) + } + + const fn _is_readonly(&self) -> bool { + 0 != ((AlzFileAttribute::Readonly as u8) & self.head.file_attribute) + } + + const fn _is_hidden(&self) -> bool { + 0 != ((AlzFileAttribute::Hidden as u8) & self.head.file_attribute) + } + + fn _dump(&self) { + println!( + "self.start_of_compressed_data = {}", + self.start_of_compressed_data + ); + + println!( + "self.head.file_name_length = {:x}", + self.head.file_name_length + ); + println!( + "self.head.file_attribute = {:02x}", + self.head.file_attribute + ); + println!("self.head.file_time_date = {:x}", self.head.file_time_date); + println!( + "self.head.file_descriptor = {:x}", + self.head.file_descriptor + ); + println!("self.head.unknown = {:x}", self.head.unknown); + + println!("self.compression_method = {:x}", self.compression_method); + println!("self.unknown = {:x}", self.unknown); + println!("self.file_crc = {:x}", self.file_crc); + println!("self.compressed_size = {:x}", self.compressed_size); + println!("self.uncompressed_size = {:x}", self.uncompressed_size); + + println!("self.file_name = {}", self.file_name); + + print!("self.enc_chk = "); + for i in 0..ALZ_ENCR_HEADER_LEN { + if 0 != i { + print!(" "); + } + print!("{}", self.enc_chk[i as usize]); + } + println!(); + + println!("is_encrypted = {}", self.is_encrypted()); + println!("is_data_descriptor = {}", self.is_data_descriptor()); + + println!(); + } + + pub const fn new() -> Self { + Self { + head: AlzLocalFileHeaderHead { + file_name_length: 0, + file_attribute: 0, + file_time_date: 0, + file_descriptor: 0, + unknown: 0, + }, + + compression_method: 0, + unknown: 0, + file_crc: 0, + compressed_size: 0, + uncompressed_size: 0, + file_name: String::new(), + enc_chk: [0; ALZ_ENCR_HEADER_LEN as usize], + start_of_compressed_data: 0, + } + } + + pub fn parse_internal( + &mut self, + cursor: &mut std::io::Cursor<&Vec>, + ) -> Result<(), Box> { + self.head.file_name_length = cursor.read_u16::()?; + self.head.file_attribute = cursor.read_u8()?; + self.head.file_time_date = cursor.read_u32::()?; + self.head.file_descriptor = cursor.read_u8()?; + self.head.unknown = cursor.read_u8()?; + + if 0 == self.head.file_name_length { + return Err(Box::new(ALZParseError::new("File Name Length is zero"))); + } + + let byte_len = self.head.file_descriptor / 0x10; + if byte_len > 0 { + self.compression_method = cursor.read_u8()?; + self.unknown = cursor.read_u8()?; + self.file_crc = cursor.read_u32::()?; + + match byte_len { + 1 => { + self.compressed_size = u64::from(cursor.read_u8()?); + self.uncompressed_size = u64::from(cursor.read_u8()?); + } + 2 => { + self.compressed_size = u64::from(cursor.read_u16::()?); + self.uncompressed_size = u64::from(cursor.read_u16::()?); + } + 4 => { + self.compressed_size = u64::from(cursor.read_u32::()?); + self.uncompressed_size = u64::from(cursor.read_u32::()?); + } + 8 => { + self.compressed_size = cursor.read_u64::()?; + self.uncompressed_size = cursor.read_u64::()?; + } + _ => return Err(Box::new(ALZParseError::new("Unsupported File Descriptor"))), + } + } + + #[allow(clippy::cast_possible_truncation)] + let idx0: usize = cursor.position() as usize; + let idx1: usize = idx0 + self.head.file_name_length as usize; + + if idx1 > cursor.get_ref().len() { + return Err(Box::new(ALZParseError::new("Invalid file name length"))); + } + + let filename = &cursor.get_ref().as_slice()[idx0..idx1]; + cursor.set_position(idx1 as u64); + + self.file_name = String::from_utf8_lossy(filename).into_owned(); + + if self.is_encrypted() { + cursor.read_exact(&mut self.enc_chk)?; + } + + self.start_of_compressed_data = cursor.position(); + cursor.set_position(self.start_of_compressed_data + self.compressed_size); + + if self.start_of_compressed_data + self.compressed_size > cursor.get_ref().len() as u64 { + return Err(Box::new(ALZParseError::new( + "Invalid compressed data length", + ))); + } + + Ok(()) + } + + pub fn parse(&mut self, cursor: &mut std::io::Cursor<&Vec>) -> Result<(), ALZParseError> { + let result = self.parse_internal(cursor); + if result.is_err() { + let e = result.err().unwrap(); + if e.is::() { + let ape: ALZParseError = *e.downcast::().unwrap(); + return Err(ape); + } + return Err(ALZParseError::new("Not ALZ")); + } + + Ok(()) + } + + pub fn is_supported(&self) -> Result<(), ALZUnsupportedError> { + if self.is_encrypted() { + return Err(ALZUnsupportedError::new( + "Encryption Unsupported".to_string(), + )); + } + + if self.is_data_descriptor() { + return Err(ALZUnsupportedError::new( + "Data Descriptors are Unsupported".to_string(), + )); + } + + Ok(()) + } + + + /* + * This has no header/checksum validation. + */ + fn extract_file_deflate( + &mut self, + cursor: &std::io::Cursor<&Vec>, + files: &mut Vec, + ) -> Result<(), ALZExtractError> { + + #[allow(clippy::cast_possible_truncation)] + let start: usize = self.start_of_compressed_data as usize; + #[allow(clippy::cast_possible_truncation)] + let end: usize = start + self.compressed_size as usize; + if end >= cursor.get_ref().len() { + return Err(ALZExtractError {}); + } + let data: &[u8] = &cursor.get_ref().as_slice()[start..end]; + + let mut inflater = InflateStream::new(); + let mut out: Vec = Vec::::new(); + let mut n: usize = 0; + + while n < data.len() { + let res = inflater.update(&data[n..]); + if let Ok((num_bytes_read, result)) = res { + n += num_bytes_read; + out.extend(result.iter().copied()); + } else { + return Err(ALZExtractError {}); + } + } + + self.write_file(&out, files); + + Ok(()) + } + + fn write_file(&mut self, buffer: &[u8], files: &mut Vec) { + let extracted_file: ExtractedFile = ExtractedFile { + name: Some(self.file_name.to_string()), + data: buffer.to_vec(), + }; + + files.push(extracted_file); + } + + fn extract_file_nocomp( + &mut self, + cursor: &mut std::io::Cursor<&Vec>, + files: &mut Vec, + ) -> Result<(), ALZExtractError> { + #[allow(clippy::cast_possible_truncation)] + let idx0: usize = self.start_of_compressed_data as usize; + + let mut len = self.compressed_size; + if self.compressed_size != self.uncompressed_size { + info!("Uncompressed file has different lengths for compressed vs uncompressed, using the shorter"); + if self.compressed_size > self.uncompressed_size { + len = self.uncompressed_size; + } + } + + #[allow(clippy::cast_possible_truncation)] + let idx1: usize = idx0 + len as usize; + if idx1 > cursor.get_ref().len() { + info!("Invalid data length"); + return Err(ALZExtractError {}); + } + + let contents = &cursor.get_ref().as_slice()[idx0..idx1]; + cursor.set_position(idx1 as u64); + + self.write_file(contents, files); + Ok(()) + } + + fn extract_file_bzip2( + &mut self, + cursor: &std::io::Cursor<&Vec>, + files: &mut Vec, + ) -> Result<(), ALZExtractError> { + #[allow(clippy::cast_possible_truncation)] + let idx0: usize = self.start_of_compressed_data as usize; + #[allow(clippy::cast_possible_truncation)] + let idx1: usize = idx0 + self.compressed_size as usize; + + let contents = &cursor.get_ref().as_slice()[idx0..idx1]; + + /* + * Create vector of the needed capacity. + */ + let mut out: Vec = Vec::new(); + for _i in 0..self.uncompressed_size { + out.push(0); + } + + let mut decompressor = BzDecoder::new(contents); + let ret = decompressor.read_exact(&mut out); + if ret.is_err() { + info!("Unable to decompress bz2 data"); + return Err(ALZExtractError {}); + } + + self.write_file(&out, files); + Ok(()) + } + + fn extract_file( + &mut self, + cursor: &mut std::io::Cursor<&Vec>, + files: &mut Vec, + ) -> Result<(), ALZExtractError> { + const ALZ_COMP_NOCOMP: u8 = 0; + const ALZ_COMP_BZIP2: u8 = 1; + const ALZ_COMP_DEFLATE: u8 = 2; + + match self.compression_method { + ALZ_COMP_NOCOMP => { + self.extract_file_nocomp(cursor, files) + } + ALZ_COMP_BZIP2 => { + self.extract_file_bzip2(cursor, files) + }, + ALZ_COMP_DEFLATE => { + self.extract_file_deflate(cursor, files) + }, + _ => Err(ALZExtractError {}), + } + } + +} + +/*TODO: Merge this with the onenote extracted_file struct, and use the same one everywhere.*/ +pub struct ExtractedFile { + pub name: Option, + pub data: Vec, +} + +#[derive(Default)] +pub struct Alz { + pub embedded_files: Vec, +} + +impl<'aa> Alz { + + /* Check for the ALZ file header. */ + #[allow(clippy::unused_self)] + fn is_alz(&self, cursor: &mut std::io::Cursor<&Vec>) -> bool { + cursor.read_u32::().map_or(false, |n| ALZ_FILE_HEADER == n) + } + + fn parse_local_fileheader(&mut self, cursor: &mut std::io::Cursor<&Vec>) -> bool { + let mut local_fileheader = AlzLocalFileHeader::new(); + + let parse_result = local_fileheader.parse(cursor); + if parse_result.is_err() { + /*This is safe, since I called is_err*/ + let e: ALZParseError = parse_result.err().unwrap(); + info!("{}", e.get_description()); + return false; + } + + if let Err(err) = local_fileheader.is_supported() { + info!("{err}"); + return false; + } + + if !local_fileheader.is_directory() { + /* The is_file flag doesn't appear to always be set, so we'll just assume it's a file if + * it's not marked as a directory.*/ + let res2 = local_fileheader.extract_file(cursor, &mut self.embedded_files); + if res2.is_err() { + return false; + } + } + + true + } + + #[allow(clippy::unused_self)] + fn parse_central_directoryheader(&self, cursor: &mut std::io::Cursor<&Vec>) -> bool { + /* + * This is ignored in unalz (UnAlz.cpp ReadCentralDirectoryStructure). + * + * It actually reads 12 bytes, and I think it happens to work because EOF is hit on the next + * read, which it does not consider an error. + */ + let ret = cursor.read_u64::(); + ret.is_ok() + } + + #[must_use] + pub const fn new() -> Self { + Self { + embedded_files: Vec::new(), + } + } + + /// # Errors + /// Will return `ALZParseError` if file headers are not correct or are inconsistent. + pub fn from_bytes(bytes: &'aa [u8]) -> Result { + let binding = bytes.to_vec(); + let mut cursor = Cursor::new(&binding); + + let mut alz: Self = Self::new(); + + if !alz.is_alz(&mut cursor) { + return Err(ALZParseError::new("No ALZ file header")); + } + + //What these bytes are supposed to be in unspecified, but they need to be there. + let ret = cursor.read_u32::(); + if ret.is_err() { + return Err(ALZParseError::new("Error reading uint32 from file")); + } + + loop { + let Ok(sig) = cursor.read_u32::() else { break; }; + + match sig { + ALZ_LOCAL_FILE_HEADER => { + if alz.parse_local_fileheader(&mut cursor) { + continue; + } + } + ALZ_CENTRAL_DIRECTORY_HEADER => { + if alz.parse_central_directoryheader(&mut cursor) { + continue; + } + } + ALZ_END_OF_CENTRAL_DIRECTORY_HEADER => { + break; + /*This is the end, nothing really to do here.*/ + } + _ => { + #[allow(clippy::uninlined_format_args)] + return Err(ALZParseError::new(format!( + "Parse Error, unrecognized sig = '{:x}'", + sig + ))); + } + } + } + + Ok(alz) + } +} diff --git a/libclamav_rust/src/lib.rs b/libclamav_rust/src/lib.rs index 3a19b84b50..2cd1399e9d 100644 --- a/libclamav_rust/src/lib.rs +++ b/libclamav_rust/src/lib.rs @@ -34,3 +34,5 @@ pub mod logging; pub mod onenote; pub mod scanners; pub mod util; + +pub mod alz; diff --git a/libclamav_rust/src/scanners.rs b/libclamav_rust/src/scanners.rs index 44795f195d..9649211cf9 100644 --- a/libclamav_rust/src/scanners.rs +++ b/libclamav_rust/src/scanners.rs @@ -20,6 +20,13 @@ * MA 02110-1301, USA. */ + + + //Rust error handling This error. + + + + use std::{ ffi::{c_char, CString}, path::Path, @@ -32,9 +39,35 @@ use log::{debug, error, warn}; use crate::{ ctx, onenote::OneNote, + alz::Alz, sys::{cl_error_t, cl_error_t_CL_ERROR, cl_error_t_CL_SUCCESS, cli_ctx, cli_magic_scan_buff}, }; +// /* DELETEME*/ +//use std::fs::create_dir_all; +//use std::fs::File; +//use std::io::Write; +//const DUMP_DIRECTORY: &str = "dump_files"; +//fn dump_file(name: &str, buf: &[u8], ctx: *mut cli_ctx){ +// +// let fmap = unsafe { ctx::current_fmap(ctx).expect("")}; +// +// //let mut temp: String = String::from(out_dir); +// let mut temp: String = DUMP_DIRECTORY.to_string(); +// temp.push('/'); +// temp.push_str( fmap.name()); +// temp.push('/'); +// temp.push_str(name); +// temp = temp.replace('\\', "/"); +// +// let p = Path::new(&temp); +// create_dir_all(p.parent().unwrap()).expect(""); +// +// let mut out = File::create(&temp).expect("Error creating output file"); +// out.write_all(buf).expect("Error writing to file"); +//} +// /* END DELETEME*/ + /// Rust wrapper of libclamav's cli_magic_scan_buff() function. /// Use magic sigs to identify the file type and then scan it. fn magic_scan(ctx: *mut cli_ctx, buf: &[u8], name: Option) -> cl_error_t { @@ -127,3 +160,64 @@ pub unsafe extern "C" fn scan_onenote(ctx: *mut cli_ctx) -> cl_error_t { scan_result } + +/// Scan an Alz file for attachments +/// +/// # Safety +/// +/// Must be a valid ctx pointer. +#[no_mangle] +pub unsafe extern "C" fn extract_alz(ctx: *mut cli_ctx) -> cl_error_t { + println!("TODO: Fix flevel in filetypes_int.h"); + let fmap = match ctx::current_fmap(ctx) { + Ok(fmap) => fmap, + Err(e) => { + warn!("Error getting FMap from ctx: {e}"); + return cl_error_t_CL_ERROR; + } + }; + + let file_bytes = match fmap.need_off(0, fmap.len()) { + Ok(bytes) => bytes, + Err(err) => { + error!( + "Failed to get file bytes for fmap of size {}: {err}", + fmap.len() + ); + return cl_error_t_CL_ERROR; + } + }; + + let alz = match Alz::from_bytes(file_bytes) { + Ok(x) => x, + Err(err) => { + error!("Failed to parse Alz file: {}", err.to_string()); + return cl_error_t_CL_ERROR; + } + }; + +// /* DELETE ME!!! */ +// for i in 0..alz.embedded_files.len(){ +// let name = match alz.embedded_files[i].name.clone() { +// Some(name) => name, +// None => String::from(""), +// }; +// +// let buf: &[u8] = &alz.embedded_files[i].data; +// dump_file(&name, buf, ctx); +// } +// /* END DELETE ME!!! */ + + for i in 0..alz.embedded_files.len(){ + let ret = magic_scan(ctx, &alz.embedded_files[i].data, alz.embedded_files[i].name.clone()); + if ret != cl_error_t_CL_SUCCESS { + return ret; + } + + } + + cl_error_t_CL_SUCCESS +} + + + diff --git a/unit_tests/clamscan/alz_test.py b/unit_tests/clamscan/alz_test.py new file mode 100644 index 0000000000..78b294b339 --- /dev/null +++ b/unit_tests/clamscan/alz_test.py @@ -0,0 +1,121 @@ +# Copyright (C) 2020-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +""" +Run clamscan tests. +""" + +import sys + +sys.path.append('../unit_tests') +import testcase + + +class TC(testcase.TestCase): + @classmethod + def setUpClass(cls): + super(TC, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TC, cls).tearDownClass() + + def setUp(self): + super(TC, self).setUp() + + def tearDown(self): + super(TC, self).tearDown() + self.verify_valgrind_log() + + def test_deflate(self): + self.step_name('Test alz files compressed with deflate (gzip)') + + testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'alz' / 'deflate.alz' + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfile}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, + path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'alz.hdb', + testfile=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus + + expected_results = [ + 'ALZ_TEST_FILE.UNOFFICIAL FOUND', + ] + self.verify_output(output.out, expected=expected_results) + + def test_bzip2(self): + self.step_name('Test alz files compressed with bzip2') + + testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'alz' / 'bzip2.alz' + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfile}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, + path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'alz.hdb', + testfile=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus + + expected_results = [ + 'ALZ_TEST_FILE.UNOFFICIAL FOUND', + ] + self.verify_output(output.out, expected=expected_results) + + def test_bzip2_with_binary(self): + self.step_name('Test alz files compressed with bzip2') + + testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'alz' / 'bzip2.bin.alz' + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfile}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, + path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'alz.hdb', + testfile=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus + + expected_results = [ + 'ALZ_TEST_FILE_EXECUTABLE.UNOFFICIAL FOUND', + ] + self.verify_output(output.out, expected=expected_results) + + def test_uncompressed(self): + self.step_name('Test alz files with no compression') + + testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'alz' / 'uncompressed.alz' + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfile}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, + path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'alz.hdb', + testfile=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus + + expected_results = [ + 'ALZ_TEST_FILE.UNOFFICIAL FOUND', + ] + self.verify_output(output.out, expected=expected_results) + + def test_uncompressed_with_binary(self): + self.step_name('Test alz files with no compression with binary data') + + testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'alz' / 'uncompressed.bin.alz' + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfile}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, + path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'alz.hdb', + testfile=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus + + expected_results = [ + 'ALZ_TEST_FILE_EXECUTABLE.UNOFFICIAL FOUND', + ] + self.verify_output(output.out, expected=expected_results) + + + + diff --git a/unit_tests/input/other_scanfiles/alz/bzip2.alz b/unit_tests/input/other_scanfiles/alz/bzip2.alz new file mode 100644 index 0000000000..c3449e43fb Binary files /dev/null and b/unit_tests/input/other_scanfiles/alz/bzip2.alz differ diff --git a/unit_tests/input/other_scanfiles/alz/bzip2.bin.alz b/unit_tests/input/other_scanfiles/alz/bzip2.bin.alz new file mode 100644 index 0000000000..71d2389e45 Binary files /dev/null and b/unit_tests/input/other_scanfiles/alz/bzip2.bin.alz differ diff --git a/unit_tests/input/other_scanfiles/alz/deflate.alz b/unit_tests/input/other_scanfiles/alz/deflate.alz new file mode 100644 index 0000000000..1114dab3d7 Binary files /dev/null and b/unit_tests/input/other_scanfiles/alz/deflate.alz differ diff --git a/unit_tests/input/other_scanfiles/alz/uncompressed.alz b/unit_tests/input/other_scanfiles/alz/uncompressed.alz new file mode 100644 index 0000000000..5efb02264f Binary files /dev/null and b/unit_tests/input/other_scanfiles/alz/uncompressed.alz differ diff --git a/unit_tests/input/other_scanfiles/alz/uncompressed.bin.alz b/unit_tests/input/other_scanfiles/alz/uncompressed.bin.alz new file mode 100644 index 0000000000..6cf38a4c9e Binary files /dev/null and b/unit_tests/input/other_scanfiles/alz/uncompressed.bin.alz differ diff --git a/unit_tests/input/other_sigs/alz.hdb b/unit_tests/input/other_sigs/alz.hdb new file mode 100644 index 0000000000..c8cfb7e943 --- /dev/null +++ b/unit_tests/input/other_sigs/alz.hdb @@ -0,0 +1,13 @@ +03d4c70e6aa3832fa51959137b6a3fc55d8b9f55:16:ALZ_TEST_FILE +05cf0585be97d8f544f034c7e46cf98778925c66:13:ALZ_TEST_FILE +12f41f69d25d0ba9b73da429e0d69b27c95522db:16:ALZ_TEST_FILE +24578375a0454c0657bac54084b50fdda1efaa21:11:ALZ_TEST_FILE +26c0e077ad49260d416dbf449569efbc7ce02448:16:ALZ_TEST_FILE +33ab5639bfd8e7b95eb1d8d0b87781d4ffea4d5d:12:ALZ_TEST_FILE +67b33eebc1e4537d839bc6b04affd6b06074c746:13:ALZ_TEST_FILE +6847c9c6e9218691910a0d7e36ac544149e3ce7d:13:ALZ_TEST_FILE +9645df16bc733a92129563ad2e5f1f6a9ed483c9:16:ALZ_TEST_FILE +9ac5483905f6c4b72c314c901ceb5eca2fee95c3:13:ALZ_TEST_FILE +cb9431a94ca1d5c64d9a1e467c543905f592f351:13:ALZ_TEST_FILE +ce5cec9fef4940d0d1fe2bc5004b14d7f8fc290c:77:ALZ_TEST_FILE +edf6cd48d7b44a6cc0a96a6139cfe020865f8c4c:16712:ALZ_TEST_FILE_EXECUTABLE