From 6a8e227333e8ec809c0b4f08cebe846ff0046200 Mon Sep 17 00:00:00 2001 From: Ollrogge Date: Sat, 13 Jan 2024 19:46:13 +0100 Subject: [PATCH] save work --- Userland/Utilities/CMakeLists.txt | 1 + Userland/Utilities/xxd.cpp | 302 +++++++++++++++++++++++++----- 2 files changed, 259 insertions(+), 44 deletions(-) diff --git a/Userland/Utilities/CMakeLists.txt b/Userland/Utilities/CMakeLists.txt index 947c8f8ca5b79f..2ed93c9e520204 100644 --- a/Userland/Utilities/CMakeLists.txt +++ b/Userland/Utilities/CMakeLists.txt @@ -158,6 +158,7 @@ target_link_libraries(wasm PRIVATE LibFileSystem LibJS LibLine LibWasm) target_link_libraries(watch PRIVATE LibFileSystem) target_link_libraries(wsctl PRIVATE LibGUI LibIPC) target_link_libraries(xml PRIVATE LibFileSystem LibXML) +target_link_libraries(xxd PRIVATE LibUnicode ) target_link_libraries(xzcat PRIVATE LibCompress) target_link_libraries(zip PRIVATE LibArchive LibFileSystem) diff --git a/Userland/Utilities/xxd.cpp b/Userland/Utilities/xxd.cpp index afd94e808e36cd..d0f5436d1535c0 100644 --- a/Userland/Utilities/xxd.cpp +++ b/Userland/Utilities/xxd.cpp @@ -1,54 +1,138 @@ #include #include +#include +#include +#include #include #include #include #include -#include -static constexpr size_t COLS_AMT = 16; -static constexpr size_t COLS_AMT_C = 12; +static constexpr size_t BYTES_PER_LINE_HEX = 16; +static constexpr size_t BYTES_PER_LINE_C = 12; +static constexpr size_t BYTES_PER_LINE_BITS = 6; +static constexpr size_t BYTES_PER_LINE_PLAIN_HEX = 30; -enum class State { - Default, +static constexpr size_t GROUP_SIZE_HEX = 2; +static constexpr size_t GROUP_SIZE_HEX_LITTLE_ENDIAN = 4; +static constexpr size_t GROUP_SIZE_BITS = 1; +static constexpr size_t GROUP_SIZE_PLAIN_HEX = 0; + +enum class DisplayStyle { + Hex, + PlainHex, + HexLittleEndian, CStyle, + Bits }; -static void -print_line(Bytes line, size_t offset, size_t cols_amt_setting) +static void print_ascii(Bytes line) { - out("{:08}: ", offset); - for (size_t i = 0; i < cols_amt_setting; i += 2) { - if (i + 1 < line.size()) { - auto chunk = line.slice(i, 2); - out("{:02x}{:02x} ", chunk[0], chunk[1]); - } else if (i < line.size()) { - out("{:02x} ", line[i]); + for (auto const& byte : line) { + if (is_ascii_printable(byte)) { + putchar(byte); } else { - out(" "); + putchar('.'); } } +} - out(" "); +static void print_line_hex(Bytes line, size_t line_length, size_t group_size, bool uppercase) +{ + for (size_t i = 0; i < line_length; ++i) { + if (i < line.size()) { + if (uppercase) { + out("{:02X}", line[i]); + } else { - for (auto const& byte : line) { - if (is_ascii_printable(byte)) { - putchar(byte); + out("{:02x}", line[i]); + } } else { - putchar('.'); + out(" "); + } + + if (group_size != 0 && (i + 1) % group_size == 0) { + out(" "); } } + out(" "); + print_ascii(line); putchar('\n'); } -static void print_line_c_style(Bytes line) +static void print_line_little_endian_hex(Bytes line, size_t line_length, size_t group_size, bool uppercase) { + if (group_size < 2) { + print_line_hex(line, line_length, group_size, uppercase); + return; + } + + size_t padding = (line_length - line.size()) / group_size; + line_length -= padding; + + for (size_t i = 0; i < line_length; i += group_size) { + size_t size = i + group_size < line.size() ? group_size : line.size() - i; + auto group = line.slice(i, size); + if (size < group_size) { + for (size_t i = 0; i < group_size - size; ++i) { + out(" "); + } + } + for (ssize_t i = group.size() - 1; i >= 0; --i) { + if (uppercase) { + out("{:02X}", group[i]); + } else { + + out("{:02x}", group[i]); + } + } + out(" "); + } + + for (size_t i = 0; i < padding; ++i) { + for (size_t i = 0; i < group_size; ++i) { + out(" "); + } + } + out(" "); - for (size_t i = 0; i < line.size(); ++i) { - out("0x{:02x}, ", line[i]); + print_ascii(line); + putchar('\n'); +} + +static void print_line_bits(Bytes line, size_t line_length, size_t group_size) +{ + auto print_byte = [](u8 byte) { + for (ssize_t i = 7; i >= 0; --i) { + out("{}", (byte >> i) & 1 ? '1' : '0'); + } + }; + + for (size_t i = 0; i < line_length; ++i) { + if (i < line.size()) { + print_byte(line[i]); + } else { + out(" "); + } + + if (group_size > 0 && (i + 1) % group_size == 0) { + out(" "); + } } + out(" "); + print_ascii(line); + putchar('\n'); +} + +static void print_line_c_style(Bytes line) +{ + out(" "); + for (size_t i = 0; i < line.size() - 1; ++i) { + out("0x{:02x}, ", line[i]); + } + out("0x{:02x}", line[line.size() - 1]); putchar('\n'); } @@ -59,53 +143,162 @@ static ErrorOr path_to_variable_name(StringView path) work = work.replace("."sv, "_"sv, ReplaceMode::All); work = work.replace("/"sv, "_"sv, ReplaceMode::All); - return String::from_byte_string(work); + return TRY(String::from_byte_string(work)); } ErrorOr serenity_main(Main::Arguments args) { + // todo: pledge + Core::ArgsParser args_parser; StringView path; + StringView seek_offset_setting; bool autoskip = false; bool c_include_file_style = false; + bool capitalize_c_include_file_style = false; + bool binary_digit_formatting = false; + bool little_endian_hexdump = false; + bool offset_in_decimal = false; + bool plain_hexdump_style = false; + bool uppercase_hex = false; + Optional line_length_setting; + Optional group_size_setting; + Optional read_limit_setting; + Optional added_file_offset_setting; + String c_include_file_style_variable_name; args_parser.add_positional_argument(path, "Input file", "input", Core::ArgsParser::Required::No); args_parser.add_option(autoskip, "A single '*' replaces NUL-lines.", "autoskip", 'a'); args_parser.add_option(c_include_file_style, "Output in C include file style.", "include", 'i'); + args_parser.add_option(capitalize_c_include_file_style, "Capitalize C include file style (-i).", "capitalize", 'C'); + args_parser.add_option(binary_digit_formatting, "Binary digit formatting", "bits", 'b'); + args_parser.add_option(plain_hexdump_style, "Plain hex dump style", "plain", 'p'); + args_parser.add_option(offset_in_decimal, "Show offset in decimal instead of hex", "decimal", 'd'); + args_parser.add_option(line_length_setting, "Octets per line", "cols", 'c', "cols"); + args_parser.add_option(little_endian_hexdump, "Little-endian hex dump", nullptr, 'e'); + args_parser.add_option(uppercase_hex, "Use upper case hexh letters", "uppercase", 'u'); + args_parser.add_option(group_size_setting, "Octet group size", "groupsize", 'g', "groupsize"); + args_parser.add_option(read_limit_setting, "Stop after writing octets", "len", 'l', "octet_length"); + args_parser.add_option(c_include_file_style_variable_name, "Set the variable name used in C include ouput (-i)", "name", 'n', "include_style"); + args_parser.add_option(added_file_offset_setting, "Add to displayed file offset", "offset", 'o', "offset"); + args_parser.add_option(seek_offset_setting, "start at bytes absolute infile offset", "[-]seek", 's', "seek"); args_parser.parse(args); + outln("Test: {} {}", c_include_file_style_variable_name, c_include_file_style_variable_name.is_empty()); + + // todo: TRY vs MUST + auto file = TRY(Core::File::open_file_or_standard_stream(path, Core::File::OpenMode::Read)); + auto file_size = TRY(file->size()); - Array contents; - Bytes bytes; - size_t total_bytes_read = 0x0; - auto state = State::Default; - size_t cols_amt_setting = COLS_AMT; + size_t read_limit = SIZE_MAX; + auto display_style = DisplayStyle::Hex; + size_t line_length = BYTES_PER_LINE_HEX; + size_t group_size = GROUP_SIZE_HEX; + size_t added_file_offset = 0; + Optional seek_offset; + + // todo: put states that can appear together in an if, else if block + // todo: how to handle invalid combinations ? + // lazy approach ? if (c_include_file_style) { - cols_amt_setting = COLS_AMT_C; - state = State::CStyle; - auto variable_name = TRY(path_to_variable_name(path)); - outln("u8 {}[] = {{", variable_name); + line_length = BYTES_PER_LINE_C; + display_style = DisplayStyle::CStyle; + + if (c_include_file_style_variable_name.is_empty()) { + c_include_file_style_variable_name = TRY(path_to_variable_name(path)); + } + + if (capitalize_c_include_file_style) { + c_include_file_style_variable_name = TRY(c_include_file_style_variable_name.to_uppercase()); + } + + outln("u8 {}[] = {{", c_include_file_style_variable_name); + } + + if (little_endian_hexdump) { + display_style = DisplayStyle::HexLittleEndian; + group_size = GROUP_SIZE_HEX_LITTLE_ENDIAN; + } + + if (plain_hexdump_style) { + display_style = DisplayStyle::PlainHex; + line_length = BYTES_PER_LINE_PLAIN_HEX; + group_size = GROUP_SIZE_PLAIN_HEX; + } + + if (line_length_setting.has_value()) { + line_length = line_length_setting.value(); + if (line_length > 256 || line_length == 0) { + outln("Invalid number of columns (max is 256)."); + return 1; + } + } + + if (binary_digit_formatting) { + if (c_include_file_style || plain_hexdump_style) { + outln("-ps and -i are incompatible with -b"); + return 1; + } + + display_style = DisplayStyle::Bits; + group_size = GROUP_SIZE_BITS; + line_length = BYTES_PER_LINE_BITS; + } + + if (group_size_setting.has_value()) { + group_size = group_size_setting.value(); + + if (little_endian_hexdump) { + if (!is_power_of_two(group_size)) { + outln("Group size must be a power of 2 with -e"); + return 1; + } + } + } + + if (read_limit_setting.has_value()) { + read_limit = read_limit_setting.value(); } - const size_t max_read_size = contents.size() - contents.size() % cols_amt_setting; + if (added_file_offset_setting.has_value()) { + added_file_offset = added_file_offset_setting.value(); + } + + // TODO: seek relative to current stdin file position + if (!seek_offset_setting.is_null()) { + seek_offset = AK::StringUtils::convert_to_int(seek_offset_setting); + } + Array contents; + Bytes bytes; + size_t total_bytes_read = 0x0; + const size_t max_read_size = contents.size() - (contents.size() % line_length); bool is_input_remaining = true; + + if (seek_offset.has_value()) { + auto offset = seek_offset.value(); + total_bytes_read = offset < 0 ? file_size - offset : offset; + } + while (is_input_remaining) { auto bytes_to_read = max_read_size - bytes.size(); bytes = contents.span().slice(0, bytes_to_read); bytes = TRY(file->read_some(bytes)); - total_bytes_read += bytes.size(); - if (bytes.size() < bytes_to_read) { is_input_remaining = false; } while (bytes.size() > 0) { - auto line_len = bytes.size() > cols_amt_setting ? cols_amt_setting : bytes.size(); + auto line_len = bytes.size() > line_length ? line_length : bytes.size(); + + if (total_bytes_read + line_len > read_limit) { + line_len = read_limit - total_bytes_read; + } + auto current_line = bytes.slice(0, line_len); bytes = bytes.slice(line_len); @@ -114,21 +307,42 @@ ErrorOr serenity_main(Main::Arguments args) continue; } - switch (state) { - case State::Default: - print_line(current_line, total_bytes_read, cols_amt_setting); + if (display_style != DisplayStyle::CStyle && display_style != DisplayStyle::PlainHex) { + if (offset_in_decimal) { + out("{:08}: ", total_bytes_read + added_file_offset); + } else { + out("{:08x}: ", total_bytes_read + added_file_offset); + } + } + + switch (display_style) { + case DisplayStyle::PlainHex: + case DisplayStyle::Hex: + print_line_hex(current_line, line_length, group_size, uppercase_hex); break; - case State::CStyle: + case DisplayStyle::HexLittleEndian: + print_line_little_endian_hex(current_line, line_length, group_size, uppercase_hex); + break; + case DisplayStyle::Bits: + print_line_bits(current_line, line_length, group_size); + break; + case DisplayStyle::CStyle: print_line_c_style(current_line); break; } + + total_bytes_read += line_len; + + if (total_bytes_read >= read_limit) { + is_input_remaining = false; + break; + } } } - if (state == State::CStyle) { + if (display_style == DisplayStyle::CStyle) { outln("}};"); - auto variable_name = TRY(path_to_variable_name(path)); - outln("size_t {}_len = {};", variable_name, total_bytes_read); + outln("size_t {}_len = {};", c_include_file_style_variable_name, total_bytes_read); } return 0;