diff --git a/CLAM-2256/Cargo.lock b/CLAM-2256/Cargo.lock new file mode 100644 index 0000000000..da0a69af58 --- /dev/null +++ b/CLAM-2256/Cargo.lock @@ -0,0 +1,123 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "cursor" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a48749e67c8bd54d6d4b2a191afaf9bd9d656d22c0852698c990a354fb86fd2c" + +[[package]] +name = "deflate" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c86f7e25f518f4b81808a2cf1c50996a61f5c2eb394b2393bd87f2a4780a432f" +dependencies = [ + "adler32", +] + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "open_alz" +version = "0.1.0" +dependencies = [ + "byteorder", + "bzip2", + "cursor", + "deflate", + "flate2", +] + +[[package]] +name = "pkg-config" +version = "0.3.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" diff --git a/CLAM-2256/Cargo.toml b/CLAM-2256/Cargo.toml new file mode 100644 index 0000000000..fef8318199 --- /dev/null +++ b/CLAM-2256/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "open_alz" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +byteorder = "1.5.0" +bzip2 = "0.4.4" +cursor = "2.3.0" +deflate = "1.0.0" +flate2 = "1.0.28" + +[workspace] diff --git a/CLAM-2256/alz_example.alz b/CLAM-2256/alz_example.alz new file mode 100644 index 0000000000..f6b2a5d5d0 Binary files /dev/null and b/CLAM-2256/alz_example.alz differ diff --git a/CLAM-2256/run.sh b/CLAM-2256/run.sh new file mode 100755 index 0000000000..8398772c11 --- /dev/null +++ b/CLAM-2256/run.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +rm -rf outDir + +#cargo run alz_example.alz outDir +#cargo run test outDir +#cargo run + +#extracts 1 jpg, mine works the same as unalz +#cargo run samples/19d17e940e603b7fec36f147b3f8ae482cac429fdcc762f61186d4c00adce8db outDir + +#cargo run samples/0abac736fb1b1dbd66901ec5258c73c543dd6b01e8d4c06970f94ff697ae1745 outDir + +#cargo run samples/e81e2bb0fa7c00c849465e10329c01f5b40ef66c48a5fa12bee24279411aa297 outDir + + +#FILE=5d21b3f56ed5e349631024ac08006622fa989cfb7bec4ad13c946d331bbffe47 corrupted +#FILE=7b1f0ecdbf1d10e6ee78510901f418c9ec7787b4caf13ac87b5da542d9a26383 corrupted +#FILE=b6461509e990d556579688a758d4018919b0087435f9147a48ef813c865e0b3d corrupted +#FILE=b47d5aa5a024f841b77f20d0d2d8410d1b411aaddd54624e94667e610c1ceabe corrupted +#FILE=e8cedbeec4eb5a9715c514da5c5ceeec375239220750cdff99e4f87e49c7c8ad corrupted +#FILE=7bbf9dafd68b5b5106b5996eaac419228d72b4872707fc2a611a468319bef509 corrupted +#FILE=72a0718760f744a67853f3ee7f5740a34db124e24f787fc61efdeb538fa30376 corrupted +#FILE=ecca4711802697a96528c8bade5158cca011b1759988aad1e44b10fb9889f8aa corrupted +#FILE=8bb3fc1d6a5e703b2a5037ea3a168b45a8b641bcfd337d3bf67152ec353affb6 corrupted +#FILE=e617b015136b23dcd3eb299ca4114b3e19a3b4ee9c4220f5dc7dd529165a45ab corrupted +#FILE=f0fb18a36848e4c12414c101a3505c4f2a74e0b476f770c5a36a47742629c379 corrupted +#FILE=fe6106acdfcc2fd821814801d8f850cfdd08d901216b52463bd7d6e2ca6fc6d8 corrupted +#FILE=f9a5b18c7d15efe4d3db5a0b5259edbaa8917fc36cec99a7571d5192aa6cff1f corrupted +#FILE=58ac36b24ecdbe6726ce2bda0b308b0273f61e8bad858339409046f93d7478df corrupted +#FILE=8cbb8f7ae044db16671003c6c3bbf063b43dc8520f503e66c49360269b4e154b corrupted +#FILE=6c3e1563ce4235720c73de8297cd3aa84959c28b9c8040ac227154920183aeb4 corrupted +#FILE=86b5e5c78a8de95510cd2fbc4dddf66e824a82412bdfb1d67ca504a51d8f1eac corrupted +#FILE=7fc7b135ed44a3f201cfac31945bf7c3007464634b5b8b464c13d87ca6f7bbea corrupted + +#cargo run samples/$FILE outDir + +FILE=unit_tests/deflate.alz +FILE=unit_tests/uncompressed.alz +FILE=unit_tests/bzip2.alz +FILE=unit_tests/bzip2.bin.alz +FILE=unit_tests/uncompressed.bin.alz + +rm -rf outDir unalz + +unalz -d unalz $FILE + +cargo run $FILE outDir + + + + + + + diff --git a/CLAM-2256/src/main.rs b/CLAM-2256/src/main.rs new file mode 100644 index 0000000000..c87aa6447d --- /dev/null +++ b/CLAM-2256/src/main.rs @@ -0,0 +1,732 @@ +use std::fs; +//use std::io; +use std::io::Cursor; +use byteorder::{LittleEndian, ReadBytesExt}; +//use std::mem::size_of; + +use std::fs::File; +use std::fs::create_dir_all; +use std::io::Write; +use std::path::Path; + +//use bzip2::Compression; +//use bzip2::read::{BzEncoder, BzDecoder}; +use bzip2::read::{BzDecoder}; + +//use deflate::deflate_bytes; +//use flate2::Decompress; +//use flate2::FlushDecompress; +/*There is also a MultiGzDecoder, but I think this is the one we want + * because of having to create the header manually.*/ +use flate2::read::GzDecoder; + +//use flate2::write::{GzEncoder}; +//use flate2::read::{GzDecoder}; + + +use std::io::Read; + +#[derive(Debug)] +struct ALZParseError { +} + +#[derive(Debug)] +struct ALZExtractError { +} + +const ALZ_FILE_HEADER: u32 = 0x015a4c41; +const ALZ_LOCAL_FILE_HEADER: u32 = 0x015a4c42; +const ALZ_CENTRAL_DIRECTORY_HEADER: u32 = 0x015a4c43; +const ALZ_END_OF_CENTRAL_DIRECTORY_HEADER : u32 = 0x025a4c43; + + +struct AlzLocalFileHeaderHead { + _file_name_length: u16, + + _file_attribute: u8, + + _file_time_date: u32, + + _file_descriptor: u8, + + _unknown: u8, + +} + +const ALZ_ENCR_HEADER_LEN: u32 = 12; + +struct AlzLocalFileHeader { + _head: AlzLocalFileHeaderHead, + + _compression_method: u8, + _unknown: u8, + _file_crc: u32, + + /* Can be smaller sizes, depending on _file_descriptor/0x10 .*/ + _compressed_size: u64, + _uncompressed_size: u64, + + _file_name: String, + + _enc_chk: [u8; ALZ_ENCR_HEADER_LEN as usize], + + _start_of_compressed_data: u64, +} + + +enum AlzFileAttribute { + _AlzFileAttributeReadonly = 0x1, + _AlzFileAttributeHidden = 0x2, + _AlzFileAttributeDirectory = 0x10, + _AlzFileAttributeFile = 0x20, +} + +impl AlzLocalFileHeader { + fn is_encrypted(&self) -> bool { + return 0 != (self._head._file_descriptor & 0x1 ); + } + + fn is_data_descriptor(&self) -> bool { + return 0 != (self._head._file_descriptor & 0x8 ); + } + + fn is_directory(&self) -> bool { + return 0 != ((AlzFileAttribute::_AlzFileAttributeDirectory as u8) & self._head._file_attribute); + } + + fn _is_file(&self) -> bool { + return 0 != ((AlzFileAttribute::_AlzFileAttributeFile as u8) & self._head._file_attribute); + } + + fn _is_readonly(&self) -> bool { + return 0 != ((AlzFileAttribute::_AlzFileAttributeReadonly as u8) & self._head._file_attribute); + } + + fn _is_hidden(&self) -> bool { + return 0 != ((AlzFileAttribute::_AlzFileAttributeHidden as u8) & self._head._file_attribute); + } + + fn _dump(&self) { + println!("self._start_of_compressed_data = {}", self._start_of_compressed_data ); + + println!("self._head._file_name_length = {:x}", self._head._file_name_length); + println!("self._head._file_attribute = {:02x}", self._head._file_attribute); + println!("self._head._file_time_date = {:x}", self._head._file_time_date); + println!("self._head._file_descriptor = {:x}", self._head._file_descriptor); + println!("self._head._unknown = {:x}", self._head._unknown); + + println!("self._compression_method = {:x}", self._compression_method); + println!("self._unknown = {:x}", self._unknown); + println!("self._file_crc = {:x}", self._file_crc); + println!("self._compressed_size = {:x}", self._compressed_size); + println!("self._uncompressed_size = {:x}", self._uncompressed_size); + + println!("self._file_name = {}", self._file_name); + + print!("self._enc_chk = "); + for i in 0..ALZ_ENCR_HEADER_LEN { + if 0 != i { + print!(" "); + } + print!("{}", self._enc_chk[i as usize]); + } + println!(""); + + + println!("is_encrypted = {}", self.is_encrypted()); + println!("is_data_descriptor = {}", self.is_data_descriptor()); + + println!(""); + + } + + pub fn new() -> Self { + + Self { + _head : AlzLocalFileHeaderHead { + _file_name_length : 0, + _file_attribute : 0, + _file_time_date: 0, + _file_descriptor : 0, + _unknown : 0, + }, + + _compression_method : 0, + _unknown : 0, + _file_crc : 0, + _compressed_size : 0, + _uncompressed_size : 0, + _file_name : "".to_string(), + _enc_chk: [0; ALZ_ENCR_HEADER_LEN as usize], + _start_of_compressed_data: 0, + } + } + + pub fn parse( &mut self, cursor: &mut std::io::Cursor<&Vec> ) -> Result<(), ALZParseError> { + /* + * TODO: Should probably rename this to parse_header or something. + */ + + let mut tu16 = cursor.read_u16::(); + if tu16.is_err(){ + return Err(ALZParseError{}); + } + self._head._file_name_length = tu16.unwrap(); + + let mut tu8 = cursor.read_u8::<>(); + if tu8.is_err() { + return Err(ALZParseError{}); + } + self._head._file_attribute = tu8.unwrap(); + + let mut tu32 = cursor.read_u32::(); + if tu32.is_err() { + return Err(ALZParseError{}); + } + self._head._file_time_date = tu32.unwrap(); + + tu8 = cursor.read_u8::<>(); + if tu8.is_err() { + return Err(ALZParseError{}); + } + self._head._file_descriptor = tu8.unwrap(); + + tu8 = cursor.read_u8::<>(); + if tu8.is_err() { + return Err(ALZParseError{}); + } + self._head._unknown = tu8.unwrap(); + + if 0 == self._head._file_name_length { + println!("Filename length cannot be zero"); + return Err(ALZParseError{}); + } + + let byte_len = self._head._file_descriptor / 0x10; + if byte_len > 0 { + + tu8 = cursor.read_u8::<>(); + if tu8.is_err() { + return Err(ALZParseError{}); + } + self._compression_method = tu8.unwrap(); + + tu8 = cursor.read_u8::<>(); + if tu8.is_err() { + return Err(ALZParseError{}); + } + self._unknown = tu8.unwrap(); + + tu32 = cursor.read_u32::(); + if tu32.is_err() { + return Err(ALZParseError{}); + } + self._file_crc = tu32.unwrap(); + + match byte_len { + 1 => { + tu8 = cursor.read_u8::<>(); + if tu8.is_err() { + return Err(ALZParseError{}); + } + self._compressed_size = tu8.unwrap() as u64; + + tu8 = cursor.read_u8::<>(); + if tu8.is_err() { + return Err(ALZParseError{}); + } + self._uncompressed_size = tu8.unwrap() as u64; + }, + 2 => { + tu16 = cursor.read_u16::(); + if tu16.is_err() { + return Err(ALZParseError{}); + } + self._compressed_size = tu16.unwrap() as u64; + + tu16 = cursor.read_u16::(); + if tu16.is_err() { + return Err(ALZParseError{}); + } + self._uncompressed_size = tu16.unwrap() as u64; + + }, + 4 => { + tu32 = cursor.read_u32::(); + if tu32.is_err() { + return Err(ALZParseError{}); + } + self._compressed_size = tu32.unwrap() as u64; + + tu32 = cursor.read_u32::(); + if tu32.is_err() { + return Err(ALZParseError{}); + } + self._uncompressed_size = tu32.unwrap() as u64; + }, + 8 => { + let mut tu64 = cursor.read_u64::(); + if tu64.is_err() { + return Err(ALZParseError{}); + } + self._compressed_size = tu64.unwrap() as u64; + + tu64 = cursor.read_u64::(); + if tu64.is_err() { + return Err(ALZParseError{}); + } + self._uncompressed_size = tu64.unwrap() as u64; + }, + _ => return Err(ALZParseError{}), + } +// assert!(self.is_file(), "NOT A FILE"); + } else { +// println!("DON'T THINK THIS IS EVER POSSIBLE, SEE IF IT COMES OUT IN TESTING!!!!!"); +// assert!(false, "EXITING HERE"); + /* + * TODO: In 'unalz', (UnAlz.cpp, CUnAlz::ReadLocalFileheader), the condition where + * byte_len (byteLen) is zero is treated as a condition that can be ignored, and + * processing can continue. I think it's probably a parse error when that + * happens and it never causes an issue because the file then fails crc and an error is + * reported, rather than just stopping parsing when that happens. I would like to look + * for a file that has that condition and see if unalz (or other unpackers) are able to + * extract anything from the file. If not, we can just return here. + * + * + * NOT THE CASE + */ + + println!("I DIDN'T THINK THIS WAS POSSIBLE, CHECKING FOR DIRECTORY"); + if self.is_directory() { + println!("THIS IS A DIRECTORY"); + } else { +// self._dump(); + println!("THIS IS NOT A DIRECTORY"); + } + } + +// if self._head._file_name_length as usize >= cursor.get_ref().len() { +// return Err(ALZParseError{}); +// } + + let mut filename: Vec = Vec::new(); + /*TODO: Figure out the correct way to allocate a vector of dynamic size and call + * cursor.read_exact, instead of having a loop of reads.*/ + for _i in 0..self._head._file_name_length { + let ret = cursor.read_u8::<>(); + if ret.is_err() { + println!("Error reading contents of the file name"); + return Err(ALZParseError{}); + } + + filename.push(ret.unwrap()); + + } + + /* + let ret = String::from_utf8(filename); + if ret.is_err(){ + assert!(false, "not utf8"); + } + self._file_name = ret.unwrap(); + */ + println!("TODO: Figure out how to add other code pages"); + self._file_name = String::from_utf8_lossy(&filename).into_owned(); + + if self.is_encrypted() { + if ALZ_ENCR_HEADER_LEN as usize > cursor.get_ref().len() { + return Err(ALZParseError{}); + } + + /*TODO: Is it safe to call unwrap here, since I already checked that there are enough + * bytes? + */ + cursor.read_exact(&mut self._enc_chk).unwrap(); + } + + self._start_of_compressed_data = cursor.position(); + + cursor.set_position(self._start_of_compressed_data + self._compressed_size); + + if self.is_encrypted() { + assert!(false, "ENCRYPTION UNIMPLEMENTED"); + } + + if self.is_data_descriptor() { + assert!(false, "IS DATA DESCRIPTOR UNIMPLEMENTED"); + } + + return Ok(()); + } + //31456954 + + fn extract_file_deflate(&mut self, cursor: &mut std::io::Cursor<&Vec>, out_dir: &String) -> Result<(), ALZExtractError>{ + cursor.set_position(self._start_of_compressed_data); + + let mut contents: Vec = Vec::new(); + + //Gzip file header format. + //https://en.wikipedia.org/wiki/Gzip + //https://www.rfc-editor.org/rfc/rfc1952.html + //https://www.ietf.org/rfc/rfc1952.txt + + //magic number + contents.push(0x1f); + contents.push(0x8b ); + + //compression method (0-7 reserved, 0x8 for deflate) + contents.push(0x08 ); + + //header flags + contents.push(0); + + //timestamp, doesn't matter what it is. + contents.push(0); + contents.push(0); + contents.push(0); + contents.push(0); + + //compression flags + contents.push(0x00); + + //operating system id + contents.push(0); + + /*TODO: Figure out the correct way to allocate a vector of dynamic size and call + * cursor.read_exact, instead of having a loop of reads.*/ + for _i in 0..self._compressed_size { + let ret = cursor.read_u8::<>(); + if ret.is_err() { + /* + println!("Cannot read full amount of data (deflate)"); + println!("_i = {}", _i); + println!("self._file_name = {}", self._file_name); + println!("self._compressed_size = {}", self._compressed_size); + println!("self._uncompressed_size = {}", self._uncompressed_size); + println!("cursor.position() = {}", cursor.position()); + + for j in 0..contents.len() { + print!("{:02x} ", contents[j]); + } + println!(""); + */ + +//println!("TODO: Figure out how to not write the beginning of 'contents' without doing a copy"); + let _ = self.write_file(out_dir, &mut contents); + println!("TODO: put a note in the metadata.json file that this file is incomplete/not decrypted"); + + return Err(ALZExtractError{}); + } + + contents.push( ret.unwrap()); + } + + //checksum of the original uncompressed data. (Get it from the FILE HEADER) + let mut bytes = self._file_crc.to_le_bytes(); + for i in 0..4{ + contents.push(bytes[i]); + } + + //length of the original uncompressed data. + bytes = (self._uncompressed_size as u32).to_le_bytes(); + for i in 0..4{ + contents.push(bytes[i]); + } + + let mut d = GzDecoder::new(&*contents); + let mut buffer: Vec = Vec::new(); + let ret = d.read_to_end(&mut buffer); + if ret.is_err() { + assert!(false, "ERROR in decompress"); + } + + + + + /* + let mut temp: String = String::from(out_dir); + temp.push('/'); + temp.push_str(&self._file_name); + temp = temp.replace("\\", "/"); + + let p = Path::new(&temp); + let ret = create_dir_all(p.parent().unwrap()); + if ret.is_err() { + assert!(false, "Cannot create directory, try and just write the file in the base directory"); + } + + let out_ret = File::create(&temp); + + if out_ret.is_err() { + assert!(false, "Error creating output file"); + } + + let mut out = out_ret.unwrap(); + + let write_ret = out.write_all(&buffer); + if write_ret.is_err() { + assert!(false, "Error writing to file"); + } + return Ok(()); + */ + return self.write_file(out_dir, &mut buffer); + } + + + fn write_file(&mut self, out_dir: &String, buffer: &mut Vec) -> Result<(), ALZExtractError>{ + let mut temp: String = String::from(out_dir); + temp.push('/'); + temp.push_str(&self._file_name); + temp = temp.replace("\\", "/"); + + let p = Path::new(&temp); + let ret = create_dir_all(p.parent().unwrap()); + if ret.is_err() { + assert!(false, "Cannot create directory, try and just write the file in the base directory"); + return Err(ALZExtractError{}); + } + + let out_ret = File::create(&temp); + + if out_ret.is_err() { + assert!(false, "Error creating output file"); + return Err(ALZExtractError{}); + } + + let mut out = out_ret.unwrap(); + + let write_ret = out.write_all(&buffer); + if write_ret.is_err() { + assert!(false, "Error writing to file"); + return Err(ALZExtractError{}); + } + + return Ok(()); + } + + fn extract_file_nocomp(&mut self, cursor: &mut std::io::Cursor<&Vec>, out_dir: &String) -> Result<(), ALZExtractError>{ + let mut contents: Vec = Vec::new(); + cursor.set_position(self._start_of_compressed_data); + + if self._compressed_size != self._uncompressed_size { + assert!(false, "Consider ignoring this and just writing the minimum number of bytes"); + return Err(ALZExtractError{}); + } + + /*TODO: Figure out the correct way to allocate a vector of dynamic size and call + * cursor.read_exact, instead of having a loop of reads.*/ + for _i in 0..self._compressed_size { + let ret = cursor.read_u8::<>(); + if ret.is_err() { + println!("Cannot read full amount of data (nocomp)"); + return Err(ALZExtractError{}); + } + + contents.push( ret.unwrap()); + } + + return self.write_file(out_dir, &mut contents); + } + + fn extract_file_bzip2(&mut self, cursor: &mut std::io::Cursor<&Vec>, out_dir: &String) -> Result<(), ALZExtractError>{ + + let mut contents: Vec = Vec::new(); + cursor.set_position(self._start_of_compressed_data); + + /*TODO: Figure out the correct way to allocate a vector of dynamic size and call + * cursor.read_exact, instead of having a loop of reads.*/ + for _i in 0..self._compressed_size { + let ret = cursor.read_u8::<>(); + if ret.is_err() { + println!("Cannot read full amount of data (bzip2)"); + return Err(ALZExtractError{}); + } + + contents.push( ret.unwrap()); + } + + /*TODO: Figure out why I have to do this, and can't just call + * Vec::with_capacity(self._uncompressed_size) + */ + let mut out: Vec = Vec::new(); + for _i in 0..self._uncompressed_size { + out.push(0); + } + + let mut decompressor = BzDecoder::new(&*contents); + let res = decompressor.read_exact(&mut out); + if res.is_err(){ + assert!(false, "Error decompressing bz2 file"); + } + + return self.write_file(out_dir, &mut out); + } + + fn extract_file(&mut self, cursor: &mut std::io::Cursor<&Vec>, out_dir: &String) -> Result<(), ALZExtractError>{ + const ALZ_COMP_NOCOMP: u8 = 0; + const ALZ_COMP_BZIP2: u8 = 1; + const ALZ_COMP_DEFLATE: u8 = 2; + + /*TODO: Consider extracting encrypted data to separate files. Maybe + * someone is interested in signaturing those files??? + */ + if self.is_encrypted(){ + println!("Figure out if we can support encryption"); + return Err(ALZExtractError{}); + } + + match self._compression_method { + ALZ_COMP_NOCOMP=>{ + return self.extract_file_nocomp(cursor, out_dir); + } + ALZ_COMP_BZIP2=>{ + return self.extract_file_bzip2(cursor, out_dir); + } + ALZ_COMP_DEFLATE=>{ + return self.extract_file_deflate(cursor, out_dir); + } + _=>{ + assert!(false, "Unsupported compression Unimplemented"); + println!("Unsupported compression"); + return Err(ALZExtractError{}); + } + } + + //return Ok(()); + } + + fn create_directory(&mut self, out_dir: &String) -> Result<(), ALZExtractError>{ + let mut temp: String = out_dir.to_owned(); + temp.push('/'); + temp.push_str(&self._file_name.to_owned()); + temp = temp.replace("\\", "/"); + let res = create_dir_all(temp); + println!("TODO: create one function for creating directories"); + if res.is_err() { + return Err(ALZExtractError{}); + } + return Ok(()); + } +} + +/* Check for the ALZ file header. */ +fn is_alz(cursor: &mut std::io::Cursor<&Vec>) -> bool { + let ret = cursor.read_u32::(); + if ret.is_ok() { + return ALZ_FILE_HEADER == ret.unwrap(); + } + return false; +} + +fn parse_local_file_header(cursor: &mut std::io::Cursor<&Vec>, out_dir: &String) -> bool{ + + let mut local_file_header = AlzLocalFileHeader::new(); + + let res = local_file_header.parse(cursor); + if res.is_err(){ + println!("Parse ERROR: Not a local file header (2)"); + return false; + } + + if local_file_header.is_directory() { + let res2 = local_file_header.create_directory(out_dir); + if res2.is_err() { + println!("Directory creation ERROR: "); + return false; + } + } else { + /*the is_file flag doesn't appear to always be set, so we'll just assume it's a file if + * it's not marked as a directory.*/ + let res2 = local_file_header.extract_file(cursor, out_dir); + if res2.is_err() { + println!("Extract ERROR: (probably should consider changing this to a warning, and parse what we have"); + return false; + } + } + + return true; +} + + +fn parse_central_directory_header(cursor: &mut std::io::Cursor<&Vec>) -> bool{ + /* + * This is ignored in unalz (UnAlz.cpp ReadCentralDirectoryStructure). + * + * It actually reads 12 bytes, and I think it happens to work because EOF is hit on the next + * read, which it does not consider an error. + */ + let ret = cursor.read_u64::(); + return ret.is_ok(); +} + +fn process_file(bytes: &Vec, out_dir: &String) -> bool { + + let mut cursor = Cursor::new(bytes); + + if !is_alz(&mut cursor){ + println!("NOT ALZ, need to return an exit status here"); + + /*Need an exit status for wrong file type.*/ + return false; + } + cursor.read_u32::().unwrap(); //ignore results, just doing this to skip 4 bytes. + + loop { + + let ret = cursor.read_u32::(); + if ret.is_err(){ + break; + } + let sig = ret.unwrap(); + + match sig { + ALZ_LOCAL_FILE_HEADER=>{ + if parse_local_file_header(&mut cursor, out_dir){ + //println!("Found a ALZ_LOCAL_FILE_HEADER"); + continue; + } + } + ALZ_CENTRAL_DIRECTORY_HEADER=>{ + if parse_central_directory_header(&mut cursor){ + println!("Found a ALZ_CENTRAL_DIRECTORY_HEADER"); + continue; + } + } + ALZ_END_OF_CENTRAL_DIRECTORY_HEADER=>{ + println!("Found a ALZ_END_OF_CENTRAL_DIRECTORY_HEADER"); + /*This is the end, nothing really to do here.*/ + } + _ => { + println!("sig = {:x}", sig); + /*Parse error, maybe try and extract what is there???*/ + assert!(false, "NOT A VALID FILE IN MATCH"); + } + } + } + + return true; + +} + +fn main() { + let args: Vec<_> = std::env::args().collect(); + + if args.len() < 3 { + println!("Usage: {} ", args[0]); + return; + } + let file_name = &args[1]; + let out_dir = &args[2]; + + let bytes: Vec = fs::read(file_name).unwrap(); + let res = create_dir_all(out_dir); + if res.is_err() { + assert!(false, "Cannot create output directory {}", out_dir); + } + process_file(&bytes, out_dir); + +} + + + + diff --git a/CLAM-2256/unit_tests/bzip2.alz b/CLAM-2256/unit_tests/bzip2.alz new file mode 100644 index 0000000000..c3449e43fb Binary files /dev/null and b/CLAM-2256/unit_tests/bzip2.alz differ diff --git a/CLAM-2256/unit_tests/create.py b/CLAM-2256/unit_tests/create.py new file mode 100755 index 0000000000..b08b45d941 --- /dev/null +++ b/CLAM-2256/unit_tests/create.py @@ -0,0 +1,161 @@ +#!/usr/bin/python3 + +import sys, os, bz2 +import shutil +import binascii +import struct +from optparse import OptionParser + +WORKING_DIRECTORY = ".__create_tmp" + +def delWD(): + if os.path.isdir(WORKING_DIRECTORY): + shutil.rmtree(WORKING_DIRECTORY) + +def createWD(): + delWD() + os.makedirs(WORKING_DIRECTORY) + +def createInFiles(): + cwd = os.getcwd() + os.chdir(WORKING_DIRECTORY) + f = open("test.txt", "w") + f.write("test file 0") + f.close() + + for i in range(1, 5): + os.makedirs(str(i)) + f = open(os.path.join(str(i), "test.txt"), "w") + f.write(f'"test file {i}"') + f.close() + os.chdir(cwd) + +def writeFileHeader(f): + #write alz file header + f.write(struct.pack(' 0xff: + numBytes = 2 + if len(data) > 0xffff: + numBytes = 4 + if len(data) > 0xffffffff: + numBytes = 8 + + outFile.write(struct.pack(" + +int main(){ + printf("Hello World!\n"); + return 0; +} diff --git a/CLAM-2256/unit_tests/uncompressed.alz b/CLAM-2256/unit_tests/uncompressed.alz new file mode 100644 index 0000000000..5efb02264f Binary files /dev/null and b/CLAM-2256/unit_tests/uncompressed.alz differ