diff --git a/libclamav/pdf.c b/libclamav/pdf.c index 1c442e1889..ff6223082e 100644 --- a/libclamav/pdf.c +++ b/libclamav/pdf.c @@ -1040,15 +1040,13 @@ static size_t find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const cha #define DUMP_MASK ((1 << OBJ_CONTENTS) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_A85) | (1 << OBJ_EMBEDDED_FILE) | (1 << OBJ_JAVASCRIPT) | (1 << OBJ_OPENACTION) | (1 << OBJ_LAUNCHACTION)) -static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd, int dumpid) +static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd) { int ret; struct cli_bc_ctx *bc_ctx; cli_ctx *ctx = NULL; fmap_t *map; - UNUSEDPARAM(dumpid); - if (NULL == pdf) return CL_EARG; @@ -1387,7 +1385,7 @@ static void process(struct text_norm_state *s, enum cstate *st, const char *buf, } while (length > 0); } -static int pdf_scan_contents(int fd, struct pdf_struct *pdf) +static int pdf_scan_contents(int fd, struct pdf_struct *pdf, struct pdf_obj *obj) { struct text_norm_state s; char fullname[1024]; @@ -1398,7 +1396,7 @@ static int pdf_scan_contents(int fd, struct pdf_struct *pdf) cl_error_t rc; enum cstate st = CSTATE_NONE; - snprintf(fullname, sizeof(fullname), "%s" PATHSEP "pdf%02u_c", pdf->dir, (pdf->files - 1)); + snprintf(fullname, sizeof(fullname), "%s" PATHSEP "pdf obj %d %d contents", pdf->dir, obj->id >> 8, obj->id & 0xff); fout = open(fullname, O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY, 0600); if (fout < 0) { char err[128]; @@ -1481,7 +1479,7 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t cli_dbgmsg("pdf_extract_obj: dumping obj %u %u\n", obj->id >> 8, obj->id & 0xff); - snprintf(fullname, sizeof(fullname), "%s" PATHSEP "pdf%02u", pdf->dir, pdf->files++); + snprintf(fullname, sizeof(fullname), "%s" PATHSEP "pdf obj %d %d", pdf->dir, obj->id >> 8, obj->id & 0xff); fout = open(fullname, O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY, 0600); if (fout < 0) { char err[128]; @@ -1839,12 +1837,7 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t } if ((rc == CL_CLEAN) || (rc == CL_VIRUS)) { - unsigned int dumpid = 0; - for (dumpid = 0; dumpid < pdf->nobjs; dumpid++) { - if (pdf->objs[dumpid] == obj) - break; - } - rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, dumpid); + rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout); if (rc2 == CL_VIRUS) { rc = rc2; goto really_done; @@ -1855,7 +1848,7 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t lseek(fout, 0, SEEK_SET); cli_dbgmsg("pdf_extract_obj: dumping contents from obj %u %u\n", obj->id >> 8, obj->id & 0xff); - rc2 = pdf_scan_contents(fout, pdf); + rc2 = pdf_scan_contents(fout, pdf, obj); if (rc2 != CL_SUCCESS) { rc = rc2; goto really_done; @@ -2832,7 +2825,19 @@ static void dbg_printhex(const char *msg, const char *hex, unsigned len) } } -static void compute_hash_r6(const char *password, size_t pwlen, const unsigned char salt[16], unsigned char hash[32]) +/** + * @brief Compute the hash of the password concatenated with the validation salt and (for owner-password checks) the U string. + * + * Some details and comments for how to compute this hash comes from the PyPDF project: + * https://github.com/py-pdf/pypdf/blob/3.17.4/pypdf/_encryption.py#L568 + * + * @param password The password to hash. + * @param pwlen The length of the password. + * @param salt The validation salt. + * @param hash The resulting hash. + * @param U [Optional] The U string (for owner-password checks). + */ +static void compute_hash_r6(const char *password, size_t pwlen, const unsigned char salt[16], unsigned char hash[32], const char *U) { unsigned char data[(128 + 64 + 48) * 64]; unsigned char block[64]; @@ -2841,14 +2846,33 @@ static void compute_hash_r6(const char *password, size_t pwlen, const unsigned c int32_t i, j, sum; uint8_t sha256[32], sha384[48], sha512[64]; + /* + * Compute a SHA-256 hash of the UTF-8 password concatenated with the 8 bytes of the owner or user validation salt. + */ memcpy(data, password, pwlen); memcpy(data + pwlen, salt, 8); - cl_sha256(data, pwlen + 8, block, NULL); + + if (NULL != U) { + // If it's for the owner password check, we also concatenate the 48-byte U string. + memcpy(data + pwlen + 8, U, 48); + + cl_sha256(data, pwlen + 8 + 48, block, NULL); + } else { + cl_sha256(data, pwlen + 8, block, NULL); + } for (i = 0; i < 64 || i < (data[(in_data_len * 64) - 1] + 32); i++) { memcpy(data, password, pwlen); memcpy(data + pwlen, block, block_size); + in_data_len = pwlen + block_size; + + if (NULL != U) { + // If it's for the owner password check, we also concatenate the 48-byte U string. + memcpy(data + pwlen + block_size, U, 48); + in_data_len += 48; + } + for (j = 1; j < 64; j++) memcpy(data + j * in_data_len, data, in_data_len); @@ -2879,184 +2903,325 @@ static void compute_hash_r6(const char *password, size_t pwlen, const unsigned c memcpy(hash, block, 32); } -static void check_user_password(struct pdf_struct *pdf, int R, const char *O, - const char *U, int32_t P, int EM, - const char *UE, size_t UE_len, - unsigned length, unsigned oulen) +/** + * @brief Check if the owner password matches an empty password. + * + * Will set the DECRYPTABLE_PDF flag if the owner password is empty. + * Will also set the key and keylen fields in the pdf_struct. + * + * Some details and comments for how to check the owner password comes from the PyPDF project: + * https://github.com/py-pdf/pypdf/blob/3.17.4/pypdf/_encryption.py#L397 + * + * @param pdf The PDF context. + * @param R The encryption version. + * @param O The /O string. + * @param U The /U string. + * @param OE The /OE string. + * @param OE_len The length of the /OE string. + */ +static void check_owner_password(struct pdf_struct *pdf, int R, + const char *O, const char *U, + const char *OE, size_t OE_len) { - unsigned i; - uint8_t result[16]; - char data[32]; - struct arc4_state arc4; - unsigned password_empty = 0; - - UNUSEDPARAM(oulen); + bool password_empty = false; dbg_printhex("U: ", U, 32); dbg_printhex("O: ", O, 32); - if (R == 5) { - uint8_t result2[32]; - /* supplement to ISO3200, 3.5.2 Algorithm 3.11 */ - /* user validation salt */ - cl_sha256(U + 32, 8, result2, NULL); - dbg_printhex("Computed U", (const char *)result2, 32); - if (!memcmp(result2, U, 32)) { - /* Algorithm 3.2a could be used to recover encryption key */ - cl_sha256(U + 40, 8, result2, NULL); + switch (R) { + case 6: { + unsigned char hash[32], validationkey[32]; - if (UE_len != 32) { - cli_dbgmsg("check_user_password: UE length is not 32: %zu\n", UE_len); - noisy_warnmsg("check_user_password: UE length is not 32: %zu\n", UE_len); - } else { - pdf->keylen = 32; - pdf->key = cli_malloc(pdf->keylen); - if (!pdf->key) { - cli_errmsg("check_user_password: Cannot allocate memory for pdf->key\n"); - return; - } + size_t pwlen = 0; + char password[] = ""; - aes_256cbc_decrypt((const unsigned char *)UE, &UE_len, (unsigned char *)(pdf->key), (char *)result2, 32, 0); - dbg_printhex("check_user_password: Candidate encryption key", pdf->key, pdf->keylen); - - password_empty = 1; + if (NULL == OE) { + cli_dbgmsg("check_owner_password: Missing OE value!\n"); + noisy_warnmsg("check_owner_password: Missing OE value!\n"); + goto done; } - } - } else if (R == 6) { - unsigned char hash[32], validationkey[32]; - size_t pwlen = 0; - char password[] = ""; + dbg_printhex("OE: ", OE, OE_len); - if (NULL == UE) { - cli_dbgmsg("check_user_password: Missing UE value!\n"); - noisy_warnmsg("check_user_password: Missing UE value!\n"); - return; - } - - compute_hash_r6(password, pwlen, (const unsigned char *)(U + 32), validationkey); - if (!memcmp(U, validationkey, sizeof(validationkey))) { - - compute_hash_r6(password, pwlen, (const unsigned char *)(U + 40), hash); + /* + * Test the password against the owner key by computing the SHA-256 hash of the UTF-8 password concatenated + * with the 8 bytes of owner validation salt, concatenated with the 48-byte U string. + */ + compute_hash_r6( + password, + pwlen, + (const unsigned char *)(O + 32), // owner validation salt + validationkey, + U); + + /* If the 32-byte result matches the first 32 bytes of the O string, this is the owner password. */ + if (0 != memcmp(O, validationkey, sizeof(validationkey))) { + cli_dbgmsg("check_owner_password: Owner password check did not match!\n"); + break; + } - if (UE_len != 32) { - cli_dbgmsg("check_user_password: UE length is not 32: %zu\n", UE_len); - noisy_warnmsg("check_user_password: UE length is not 32: %zu\n", UE_len); + /* + * Compute an intermediate owner key by computing the SHA-256 hash of the UTF-8 password concatenated with + * the 8 bytes of owner key salt, concatenated with the 48-byte U string. + */ + compute_hash_r6( + password, + pwlen, + (const unsigned char *)(O + 40), // owner key salt + hash, + U); + + if (OE_len != 32) { + cli_dbgmsg("check_owner_password: OE length is not 32: %zu\n", OE_len); + noisy_warnmsg("check_owner_password: OE length is not 32: %zu\n", OE_len); } else { pdf->keylen = 32; pdf->key = cli_malloc(pdf->keylen); if (!pdf->key) { - cli_errmsg("check_user_password: Cannot allocate memory for pdf->key\n"); - return; + cli_errmsg("check_owner_password: Cannot allocate memory for pdf->key\n"); + goto done; } - aes_256cbc_decrypt((const unsigned char *)UE, &UE_len, (unsigned char *)(pdf->key), (char *)hash, 32, 0); - dbg_printhex("check_user_password: Candidate encryption key", pdf->key, pdf->keylen); + aes_256cbc_decrypt((const unsigned char *)OE, &OE_len, (unsigned char *)(pdf->key), (char *)hash, 32, 0); + dbg_printhex("check_owner_password: Candidate encryption key", pdf->key, pdf->keylen); - password_empty = 1; + password_empty = true; } + + break; + } + default: { + cli_dbgmsg("check_owner_password: Unknown or unsupported encryption version. R: %d\n", R); + noisy_warnmsg("check_owner_password: Unknown or unsupported encryption version. R: %d\n", R); } - } else if ((R >= 2) && (R <= 4)) { - unsigned char *d; - size_t sz = 68 + pdf->fileIDlen + (R >= 4 && !EM ? 4 : 0); - d = calloc(1, sz); + } - if (!(d)) - return; + if (password_empty) { + /* The key we computed above is the key used to encrypt the streams. We could decrypt it now if we wanted to */ + pdf->flags |= 1 << DECRYPTABLE_PDF; - memcpy(d, key_padding, 32); - memcpy(d + 32, O, 32); - P = le32_to_host(P); - memcpy(d + 64, &P, 4); - memcpy(d + 68, pdf->fileID, pdf->fileIDlen); - - /* 7.6.3.3 Algorithm 2 */ - /* empty password, password == padding */ - if (R >= 4 && !EM) { - uint32_t v = 0xFFFFFFFF; - memcpy(d + 68 + pdf->fileIDlen, &v, 4); - } + cli_dbgmsg("check_owner_password: encrypted PDF found, owner password is empty, will attempt to decrypt\n"); + noisy_msg(pdf, "check_owner_password: encrypted PDF found, owner password is empty, will attempt to decrypt\n"); + } else { + /* The key is not valid, we would need the user or the owner password to decrypt */ + cli_dbgmsg("check_owner_password: encrypted PDF found but cannot decrypt with empty owner password\n"); + noisy_warnmsg("check_owner_password: encrypted PDF found but cannot decrypt with empty owner password\n"); + } - cl_hash_data("md5", d, sz, result, NULL); - free(d); - if (length > 128) - length = 128; - if (R >= 3) { - /* Yes, this really is on purpose */ - for (i = 0; i < 50; i++) - cl_hash_data("md5", result, length / 8, result, NULL); - } - if (R == 2) - length = 40; +done: - pdf->keylen = length / 8; - pdf->key = cli_malloc(pdf->keylen); - if (!pdf->key) - return; + return; +} - memcpy(pdf->key, result, pdf->keylen); - dbg_printhex("md5", (const char *)result, 16); - dbg_printhex("Candidate encryption key", pdf->key, pdf->keylen); +static void check_user_password(struct pdf_struct *pdf, int R, const char *O, + const char *U, int32_t P, int EM, + const char *UE, size_t UE_len, + unsigned length) +{ + unsigned i; + uint8_t result[16]; + char data[32]; + struct arc4_state arc4; + bool password_empty = false; - /* 7.6.3.3 Algorithm 6 */ - if (R == 2) { - /* 7.6.3.3 Algorithm 4 */ - memcpy(data, key_padding, 32); - if (false == arc4_init(&arc4, (const uint8_t *)(pdf->key), pdf->keylen)) { - noisy_warnmsg("check_user_password: failed to init arc4\n"); - return; - } - arc4_apply(&arc4, (uint8_t *)data, 32); - dbg_printhex("computed U (R2)", data, 32); - if (!memcmp(data, U, 32)) - password_empty = 1; - } else if (R >= 3) { - unsigned len = pdf->keylen; + dbg_printhex("U: ", U, 32); + dbg_printhex("O: ", O, 32); + + switch (R) { + case 2: + case 3: + case 4: { unsigned char *d; + size_t sz = 68 + pdf->fileIDlen + (R >= 4 && !EM ? 4 : 0); d = calloc(1, sz); - d = calloc(1, 32 + pdf->fileIDlen); if (!(d)) - return; + goto done; - /* 7.6.3.3 Algorithm 5 */ memcpy(d, key_padding, 32); - memcpy(d + 32, pdf->fileID, pdf->fileIDlen); - cl_hash_data("md5", d, 32 + pdf->fileIDlen, result, NULL); - memcpy(data, pdf->key, len); + memcpy(d + 32, O, 32); + P = le32_to_host(P); + memcpy(d + 64, &P, 4); + memcpy(d + 68, pdf->fileID, pdf->fileIDlen); + + /* 7.6.3.3 Algorithm 2 */ + /* empty password, password == padding */ + if (R >= 4 && !EM) { + uint32_t v = 0xFFFFFFFF; + memcpy(d + 68 + pdf->fileIDlen, &v, 4); + } - if (false == arc4_init(&arc4, (const uint8_t *)data, len)) { - noisy_warnmsg("check_user_password: failed to init arc4\n"); - return; + cl_hash_data("md5", d, sz, result, NULL); + free(d); + if (length > 128) + length = 128; + if (R >= 3) { + /* Yes, this really is on purpose */ + for (i = 0; i < 50; i++) + cl_hash_data("md5", result, length / 8, result, NULL); } - arc4_apply(&arc4, result, 16); - for (i = 1; i <= 19; i++) { - unsigned j; + if (R == 2) + length = 40; + + pdf->keylen = length / 8; + pdf->key = cli_malloc(pdf->keylen); + if (!pdf->key) + goto done; + + memcpy(pdf->key, result, pdf->keylen); + dbg_printhex("md5", (const char *)result, 16); + dbg_printhex("Candidate encryption key", pdf->key, pdf->keylen); + + /* 7.6.3.3 Algorithm 6 */ + if (R == 2) { + /* 7.6.3.3 Algorithm 4 */ + memcpy(data, key_padding, 32); + if (false == arc4_init(&arc4, (const uint8_t *)(pdf->key), pdf->keylen)) { + noisy_warnmsg("check_user_password: failed to init arc4\n"); + goto done; + } + arc4_apply(&arc4, (uint8_t *)data, 32); + dbg_printhex("computed U (R2)", data, 32); + if (!memcmp(data, U, 32)) + password_empty = true; + } else { + // R is 3 or 4 + unsigned len = pdf->keylen; + unsigned char *d; + + d = calloc(1, 32 + pdf->fileIDlen); + if (!(d)) + goto done; - for (j = 0; j < len; j++) - data[j] = pdf->key[j] ^ i; + /* 7.6.3.3 Algorithm 5 */ + memcpy(d, key_padding, 32); + memcpy(d + 32, pdf->fileID, pdf->fileIDlen); + cl_hash_data("md5", d, 32 + pdf->fileIDlen, result, NULL); + memcpy(data, pdf->key, len); if (false == arc4_init(&arc4, (const uint8_t *)data, len)) { noisy_warnmsg("check_user_password: failed to init arc4\n"); - return; + goto done; } arc4_apply(&arc4, result, 16); + for (i = 1; i <= 19; i++) { + unsigned j; + + for (j = 0; j < len; j++) + data[j] = pdf->key[j] ^ i; + + if (false == arc4_init(&arc4, (const uint8_t *)data, len)) { + noisy_warnmsg("check_user_password: failed to init arc4\n"); + goto done; + } + arc4_apply(&arc4, result, 16); + } + + dbg_printhex("fileID", pdf->fileID, pdf->fileIDlen); + dbg_printhex("computed U (R>=3)", (const char *)result, 16); + if (!memcmp(result, U, 16)) + password_empty = true; + free(d); } - dbg_printhex("fileID", pdf->fileID, pdf->fileIDlen); - dbg_printhex("computed U (R>=3)", (const char *)result, 16); - if (!memcmp(result, U, 16)) - password_empty = 1; - free(d); - } else { - cli_dbgmsg("check_user_password: invalid revision %d\n", R); - noisy_warnmsg("check_user_password: invalid revision %d\n", R); + break; } - } else { - /* Supported R is in {2,3,4,5} */ - cli_dbgmsg("check_user_password: R value out of range\n"); - noisy_warnmsg("check_user_password: R value out of range\n"); + case 5: { + uint8_t result2[32]; + + /* supplement to ISO3200, 3.5.2 Algorithm 3.11 */ + /* user validation salt */ + cl_sha256(U + 32, 8, result2, NULL); + dbg_printhex("Computed U", (const char *)result2, 32); + if (!memcmp(result2, U, 32)) { + /* Algorithm 3.2a could be used to recover encryption key */ + cl_sha256(U + 40, 8, result2, NULL); + + if (UE_len != 32) { + cli_dbgmsg("check_user_password: UE length is not 32: %zu\n", UE_len); + noisy_warnmsg("check_user_password: UE length is not 32: %zu\n", UE_len); + } else { + pdf->keylen = 32; + pdf->key = cli_malloc(pdf->keylen); + if (!pdf->key) { + cli_errmsg("check_user_password: Cannot allocate memory for pdf->key\n"); + goto done; + } - return; + aes_256cbc_decrypt((const unsigned char *)UE, &UE_len, (unsigned char *)(pdf->key), (char *)result2, 32, 0); + dbg_printhex("check_user_password: Candidate encryption key", pdf->key, pdf->keylen); + + password_empty = true; + } + } + + break; + } + case 6: { + unsigned char hash[32], validationkey[32]; + + size_t pwlen = 0; + char password[] = ""; + + if (NULL == UE) { + cli_dbgmsg("check_user_password: Missing UE value!\n"); + noisy_warnmsg("check_user_password: Missing UE value!\n"); + goto done; + } + + dbg_printhex("UE: ", UE, UE_len); + + /* + * Test the password against the user key by computing the SHA-256 hash of the UTF-8 password concatenated + * with the 8 bytes of user validation salt. + */ + compute_hash_r6( + password, + pwlen, + (const unsigned char *)(U + 32), // user validation salt + validationkey, + NULL); // no U string for user password check + + /* If the 32-byte result matches the first 32 bytes of the U string, this is the user password. */ + if (0 != memcmp(U, validationkey, sizeof(validationkey))) { + cli_dbgmsg("check_user_password: User password check did not match!\n"); + break; + } + + /* + * Compute an intermediate user key by computing the SHA-256 hash of the UTF-8 password concatenated with + * the 8 bytes of user key salt. + */ + compute_hash_r6( + password, + pwlen, + (const unsigned char *)(U + 40), // user key salt + hash, + NULL); // no U string for user password check + + if (UE_len != 32) { + cli_dbgmsg("check_user_password: UE length is not 32: %zu\n", UE_len); + noisy_warnmsg("check_user_password: UE length is not 32: %zu\n", UE_len); + } else { + pdf->keylen = 32; + pdf->key = cli_malloc(pdf->keylen); + if (!pdf->key) { + cli_errmsg("check_user_password: Cannot allocate memory for pdf->key\n"); + goto done; + } + + aes_256cbc_decrypt((const unsigned char *)UE, &UE_len, (unsigned char *)(pdf->key), (char *)hash, 32, 0); + dbg_printhex("check_user_password: Candidate encryption key", pdf->key, pdf->keylen); + + password_empty = true; + } + + break; + } + default: { + /* Supported R is in {2,3,4,5} */ + cli_dbgmsg("check_user_password: R value out of range\n"); + noisy_warnmsg("check_user_password: R value out of range\n"); + } } if (password_empty) { @@ -3070,6 +3235,9 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O, cli_dbgmsg("check_user_password: user/owner password would be required for decryption\n"); noisy_warnmsg("check_user_password: encrypted PDF found, user password is NOT empty, cannot decrypt!\n"); } + +done: + return; } enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def) @@ -3110,8 +3278,19 @@ void pdf_handle_enc(struct pdf_struct *pdf) { struct pdf_obj *obj; uint32_t len, n, R, P, length, EM = 1, i, oulen; - char *O, *U, *UE, *StmF, *StrF, *EFF; + + char *O = NULL; + char *OE = NULL; + size_t OE_len = 0; + + char *U = NULL; + char *UE = NULL; size_t UE_len = 0; + + char *StmF = NULL; + char *StrF = NULL; + char *EFF = NULL; + const char *q, *q2; if (pdf->enc_objid == ~0u) @@ -3135,158 +3314,185 @@ void pdf_handle_enc(struct pdf_struct *pdf) : (const char *)(obj->start + pdf->map); O = U = UE = StmF = StrF = EFF = NULL; - do { - pdf->enc_method_string = ENC_UNKNOWN; - pdf->enc_method_stream = ENC_UNKNOWN; - pdf->enc_method_embeddedfile = ENC_UNKNOWN; + pdf->enc_method_string = ENC_UNKNOWN; + pdf->enc_method_stream = ENC_UNKNOWN; + pdf->enc_method_embeddedfile = ENC_UNKNOWN; - q2 = cli_memstr(q, len, "/Standard", 9); - if (!q2) { - cli_dbgmsg("pdf_handle_enc: /Standard not found\n"); - noisy_warnmsg("pdf_handle_enc: /Standard not found\n"); - break; - } + q2 = cli_memstr(q, len, "/Standard", 9); + if (!q2) { + cli_dbgmsg("pdf_handle_enc: /Standard not found\n"); + noisy_warnmsg("pdf_handle_enc: /Standard not found\n"); + goto done; + } - /* we can have both of these: - * /AESV2/Length /Standard/Length - * /Length /Standard - * make sure we don't mistake AES's length for Standard's */ - length = pdf_readint(q2, len - (q2 - q), "/Length"); - if (length == ~0u) - length = pdf_readint(q, len, "/Length"); - - if (length < 40) { - cli_dbgmsg("pdf_handle_enc: invalid length: %d\n", length); - length = 40; - } + /* we can have both of these: + * /AESV2/Length /Standard/Length + * /Length /Standard + * make sure we don't mistake AES's length for Standard's */ + length = pdf_readint(q2, len - (q2 - q), "/Length"); + if (length == ~0u) + length = pdf_readint(q, len, "/Length"); - R = pdf_readint(q, len, "/R"); - if (R == ~0u) { - cli_dbgmsg("pdf_handle_enc: invalid R\n"); - noisy_warnmsg("pdf_handle_enc: invalid R\n"); - break; - } + if (length < 40) { + cli_dbgmsg("pdf_handle_enc: invalid length: %d\n", length); + length = 40; + } - if ((R > 6) || (R < 2)) { - cli_dbgmsg("pdf_handle_enc: R value outside supported range [2..6]\n"); - noisy_warnmsg("pdf_handle_enc: R value outside supported range [2..6]\n"); - break; - } + R = pdf_readint(q, len, "/R"); + if (R == ~0u) { + cli_dbgmsg("pdf_handle_enc: invalid R\n"); + noisy_warnmsg("pdf_handle_enc: invalid R\n"); + goto done; + } - P = pdf_readint(q, len, "/P"); - if (R < 6) { // P field doesn't seem to be required for R6. - if (P == ~0u) { - cli_dbgmsg("pdf_handle_enc: invalid P\n"); - noisy_warnmsg("pdf_handle_enc: invalid P\n"); - break; - } + if ((R > 6) || (R < 2)) { + cli_dbgmsg("pdf_handle_enc: R value outside supported range [2..6]\n"); + noisy_warnmsg("pdf_handle_enc: R value outside supported range [2..6]\n"); + goto done; + } + + P = pdf_readint(q, len, "/P"); + if (R < 6) { // P field doesn't seem to be required for R6. + if (P == ~0u) { + cli_dbgmsg("pdf_handle_enc: invalid P\n"); + noisy_warnmsg("pdf_handle_enc: invalid P\n"); + goto done; } + } - if (R < 5) - oulen = 32; - else - oulen = 48; - - if (R == 2 || R == 3) { - pdf->enc_method_stream = ENC_V2; - pdf->enc_method_string = ENC_V2; - pdf->enc_method_embeddedfile = ENC_V2; - } else if (R == 4 || R == 5 || R == 6) { - EM = pdf_readbool(q, len, "/EncryptMetadata", 1); - StmF = pdf_readval(q, len, "/StmF"); - StrF = pdf_readval(q, len, "/StrF"); - EFF = pdf_readval(q, len, "/EFF"); - n = len; - pdf->CF = pdf_getdict(q, (int *)(&n), "/CF"); - pdf->CF_n = n; - - if (StmF) - cli_dbgmsg("pdf_handle_enc: StmF: %s\n", StmF); - if (StrF) - cli_dbgmsg("pdf_handle_enc: StrF: %s\n", StrF); - if (EFF) - cli_dbgmsg("pdf_handle_enc: EFF: %s\n", EFF); - - pdf->enc_method_stream = parse_enc_method(pdf->CF, n, StmF, ENC_IDENTITY); - pdf->enc_method_string = parse_enc_method(pdf->CF, n, StrF, ENC_IDENTITY); - pdf->enc_method_embeddedfile = parse_enc_method(pdf->CF, n, EFF, pdf->enc_method_stream); - - free(StmF); - free(StrF); - free(EFF); - - cli_dbgmsg("pdf_handle_enc: EncryptMetadata: %s\n", EM ? "true" : "false"); - - if (R == 4) { - length = 128; - } else { - n = 0; - UE = pdf_readstring(q, len, "/UE", &n, NULL, false); - UE_len = n; - length = 256; - } + if (R < 5) { + oulen = 32; + } else { + oulen = 48; + } + + if (R == 2 || R == 3) { + pdf->enc_method_stream = ENC_V2; + pdf->enc_method_string = ENC_V2; + pdf->enc_method_embeddedfile = ENC_V2; + } else if (R == 4 || R == 5 || R == 6) { + EM = pdf_readbool(q, len, "/EncryptMetadata", 1); + StmF = pdf_readval(q, len, "/StmF"); + StrF = pdf_readval(q, len, "/StrF"); + EFF = pdf_readval(q, len, "/EFF"); + n = len; + pdf->CF = pdf_getdict(q, (int *)(&n), "/CF"); + pdf->CF_n = n; + + if (StmF) { + cli_dbgmsg("pdf_handle_enc: StmF: %s\n", StmF); + } + if (StrF) { + cli_dbgmsg("pdf_handle_enc: StrF: %s\n", StrF); + } + if (EFF) { + cli_dbgmsg("pdf_handle_enc: EFF: %s\n", EFF); } - if (length == ~0u) - length = 40; + pdf->enc_method_stream = parse_enc_method(pdf->CF, n, StmF, ENC_IDENTITY); + pdf->enc_method_string = parse_enc_method(pdf->CF, n, StrF, ENC_IDENTITY); + pdf->enc_method_embeddedfile = parse_enc_method(pdf->CF, n, EFF, pdf->enc_method_stream); - n = 0; - O = pdf_readstring(q, len, "/O", &n, NULL, false); - if (!O || n < oulen) { - cli_dbgmsg("pdf_handle_enc: invalid O: %d\n", n); - cli_dbgmsg("pdf_handle_enc: invalid O: %d\n", n); - if (O) - dbg_printhex("invalid O", O, n); + cli_dbgmsg("pdf_handle_enc: EncryptMetadata: %s\n", EM ? "true" : "false"); - break; + if (R == 4) { + length = 128; + } else { + length = 256; + + /* + * Read the UE value (for checking user-password) + */ + n = 0; + UE = pdf_readstring(q, len, "/UE", &n, NULL, false); + UE_len = n; + + /* + * Read the OE value (for checking owner-password) + */ + n = 0; + OE = pdf_readstring(q, len, "/OE", &n, NULL, false); + OE_len = n; } - if (n > oulen) { - for (i = oulen; i < n; i++) - if (O[i]) - break; + } + + if (length == ~0u) + length = 40; - if (i != n) { + /* + * Read the O value + */ + n = 0; + O = pdf_readstring(q, len, "/O", &n, NULL, false); + if (!O || n < oulen) { + cli_dbgmsg("pdf_handle_enc: invalid O: %d\n", n); + noisy_warnmsg("pdf_handle_enc: invalid O: %d\n", n); + if (O) { + dbg_printhex("invalid O", O, n); + } + + goto done; + } + if (n > oulen) { + for (i = oulen; i < n; i++) { + if (O[i]) { dbg_printhex("pdf_handle_enc: too long O", O, n); noisy_warnmsg("pdf_handle_enc: too long O: %u", n); - break; + goto done; } } + } - n = 0; - U = pdf_readstring(q, len, "/U", &n, NULL, false); - if (!U || n < oulen) { - cli_dbgmsg("pdf_handle_enc: invalid U: %u\n", n); - noisy_warnmsg("pdf_handle_enc: invalid U: %u\n", n); - - if (U) - dbg_printhex("invalid U", U, n); - - break; + /* + * Read the U value + */ + n = 0; + U = pdf_readstring(q, len, "/U", &n, NULL, false); + if (!U || n < oulen) { + cli_dbgmsg("pdf_handle_enc: invalid U: %u\n", n); + noisy_warnmsg("pdf_handle_enc: invalid U: %u\n", n); + if (U) { + dbg_printhex("invalid U", U, n); } - if (n > oulen) { - for (i = oulen; i < n; i++) - if (U[i]) - break; - if (i != n) { + goto done; + } + + if (n > oulen) { + for (i = oulen; i < n; i++) { + if (U[i]) { dbg_printhex("too long U", U, n); - break; + goto done; } } + } - cli_dbgmsg("pdf_handle_enc: Encrypt R: %d, P %x, length: %u\n", R, P, length); - if (length % 8) { - cli_dbgmsg("pdf_handle_enc: wrong key length, not multiple of 8\n"); - noisy_warnmsg("pdf_handle_enc: wrong key length, not multiple of 8\n"); - break; - } - check_user_password(pdf, R, O, U, P, EM, UE, UE_len, length, oulen); - } while (0); + cli_dbgmsg("pdf_handle_enc: Encrypt R: %d, P %x, length: %u\n", R, P, length); + if (length % 8) { + cli_dbgmsg("pdf_handle_enc: wrong key length, not multiple of 8\n"); + noisy_warnmsg("pdf_handle_enc: wrong key length, not multiple of 8\n"); + goto done; + } + + // Check the owner password. + check_owner_password(pdf, R, O, U, OE, OE_len); + + if (NULL == pdf->key) { + // Wasn't the owner password, let's try the user password. + check_user_password(pdf, R, O, U, P, EM, UE, UE_len, length); + } +done: free(O); + free(OE); + free(U); free(UE); + + free(StmF); + free(StrF); + free(EFF); } /** @@ -3431,7 +3637,7 @@ static cl_error_t pdf_find_and_extract_objs(struct pdf_struct *pdf) } if (CL_SUCCESS == status) { - status = run_pdf_hooks(pdf, PDF_PHASE_PARSED, -1, -1); + status = run_pdf_hooks(pdf, PDF_PHASE_PARSED, -1); cli_dbgmsg("pdf_find_and_extract_objs: (parsed hooks) returned %d\n", status); } @@ -3667,7 +3873,7 @@ cl_error_t cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) pdf.startoff = offset; - rc = run_pdf_hooks(&pdf, PDF_PHASE_PRE, -1, -1); + rc = run_pdf_hooks(&pdf, PDF_PHASE_PRE, -1); if (CL_SUCCESS != rc) { cli_dbgmsg("cli_pdf: (pre hooks) returning %d\n", rc); @@ -3696,7 +3902,7 @@ cl_error_t cli_pdf(const char *dir, cli_ctx *ctx, off_t offset) if (pdf.flags && CL_SUCCESS == rc) { cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags); - rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1, -1); + rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1); if (CL_SUCCESS == rc && SCAN_HEURISTICS && (ctx->dconf->other & OTHER_CONF_PDFNAMEOBJ)) { if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) { diff --git a/libclamav/pdfdecode.c b/libclamav/pdfdecode.c index 16feb108e4..960e103d7b 100644 --- a/libclamav/pdfdecode.c +++ b/libclamav/pdfdecode.c @@ -78,7 +78,6 @@ struct pdf_token { }; static size_t pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm); -static cl_error_t pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, uint32_t lvl); static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token); static cl_error_t filter_rldecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token); @@ -338,13 +337,6 @@ static size_t pdf_decodestream_internal( break; } token->success++; - - /* Dump the stream content to a text file if keeptmp is enabled. */ - if (pdf->ctx->engine->keeptmp) { - if (CL_SUCCESS != pdf_decode_dump(pdf, obj, token, i + 1)) { - cli_errmsg("pdf_decodestream_internal: failed to write decoded stream content to temp file\n"); - } - } } if ((token->success > 0) && (NULL != token->content)) { @@ -399,45 +391,6 @@ static size_t pdf_decodestream_internal( return bytes_scanned; } -/** - * @brief Dump PDF filter content such as stream contents to a temp file. - * - * Temp file is created in the pdf->dir directory. - * Filename format is "pdffiles-1>_". - * - * @param pdf Pdf context structure. - * @param obj The object we found the filter content in. - * @param token The struct for the filter contents. - * @param lvl A unique index to distinguish the files from each other. - * @return cl_error_t - */ -static cl_error_t pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, uint32_t lvl) -{ - char fname[1024]; - int ifd; - - snprintf(fname, sizeof(fname), "%s" PATHSEP "pdf%02u_%02u", pdf->dir, (pdf->files - 1), lvl); - ifd = open(fname, O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY, 0600); - if (ifd < 0) { - char err[128]; - - cli_errmsg("cli_pdf: can't create intermediate temporary file %s: %s\n", fname, cli_strerror(errno, err, sizeof(err))); - return CL_ETMPFILE; - } - - cli_dbgmsg("cli_pdf: decoded filter %u obj %u %u\n", lvl, obj->id >> 8, obj->id & 0xff); - cli_dbgmsg(" ... to %s\n", fname); - - if (cli_writen(ifd, token->content, token->length) != token->length) { - cli_errmsg("cli_pdf: failed to write output file\n"); - close(ifd); - return CL_EWRITE; - } - - close(ifd); - return CL_SUCCESS; -} - /* * ascii85 inflation * See http://www.piclist.com/techref/method/encode.htm (look for base85) diff --git a/libclamav/pdfng.c b/libclamav/pdfng.c index 1e655350ce..6bd6538fd7 100644 --- a/libclamav/pdfng.c +++ b/libclamav/pdfng.c @@ -229,14 +229,72 @@ int is_object_reference(char *begin, char **endchar, uint32_t *id) static char *pdf_decrypt_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t *length) { enum enc_method enc; + const char *hex = NULL; + const char *bin = NULL; + char *decoded_bin = NULL; + char *dec = NULL; + size_t bin_length; /* handled only once in cli_pdf() */ // pdf_handle_enc(pdf); if (pdf->flags & (1 << DECRYPTABLE_PDF)) { + int hex2str_ret; + bool hex_encoded_binary = false; + enc = get_enc_method(pdf, obj); - return decrypt_any(pdf, obj->id, in, length, enc); + + // Strip off the leading `<` and trailing `>` + const char *start = in; + if (start[0] == '<') { + start++; + hex_encoded_binary = true; + } + const char *end = in + *length; + if (end[-1] == '>') { + end--; + } + + *length = (end - start); + + if (hex_encoded_binary) { + hex = start; + bin_length = *length / 2; + + // Convert the hex string to binary + decoded_bin = cli_calloc(1, bin_length); + if (!decoded_bin) { + return NULL; + } + + hex2str_ret = cli_hex2str_to(hex, decoded_bin, *length); + if (hex2str_ret != 0) { + cli_dbgmsg("pdf_decrypt_string: cli_hex2str_to() failed\n"); + goto done; + } + + bin = decoded_bin; + } else { + // Binary is just embedded directly in the file, no encoding. + bin = start; + bin_length = *length; + } + + // Decrypt the binary + dec = decrypt_any(pdf, obj->id, bin, &bin_length, enc); + if (!dec) { + cli_dbgmsg("pdf_decrypt_string: decrypt_any() failed\n"); + goto done; + } + + *length = bin_length; } - return NULL; + +done: + if (NULL != decoded_bin) { + free(decoded_bin); + } + + return dec; } char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len)