From fb305a709547930e4e9e409af10cb02fe8ebcb4a Mon Sep 17 00:00:00 2001 From: Anthony Chan Date: Fri, 18 Nov 2022 23:48:40 -0700 Subject: [PATCH 1/2] Defer or avoid file MD5 calculation when cache is disabled --- libclamav/cache.c | 5 ----- libclamav/scanners.c | 35 ++++++++++++++++++++++------------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/libclamav/cache.c b/libclamav/cache.c index 318de52dc1..087ada292e 100644 --- a/libclamav/cache.c +++ b/libclamav/cache.c @@ -752,11 +752,6 @@ cl_error_t clean_cache_check(unsigned char *md5, size_t size, cli_ctx *ctx) return CL_VIRUS; } - if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) { - cli_dbgmsg("clean_cache_check: Caching disabled. Returning CL_VIRUS.\n"); - return CL_VIRUS; - } - ret = cache_lookup_hash(md5, size, ctx->engine->cache, ctx->recursion_level); cli_dbgmsg("clean_cache_check: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x is %s\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15], (ret == CL_VIRUS) ? "negative" : "positive"); return ret; diff --git a/libclamav/scanners.c b/libclamav/scanners.c index ed5722069a..8aab626131 100644 --- a/libclamav/scanners.c +++ b/libclamav/scanners.c @@ -4213,7 +4213,8 @@ static inline bool result_should_goto_done(cli_ctx *ctx, cl_error_t result_in, c cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) { cl_error_t ret = CL_CLEAN; - cl_error_t cache_check_result; + cl_error_t cache_check_result = CL_VIRUS; + bool cache_enabled = true; cl_error_t verdict_at_this_level; cli_file_t dettype = 0; uint8_t typercg = 1; @@ -4295,6 +4296,10 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) typercg = 0; } + if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) { + cache_enabled = false; + } + /* * Perform file typing from the start of the file. */ @@ -4398,17 +4403,19 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) } /* - * Get the maphash + * Get the maphash, if necessary */ - if (CL_SUCCESS != fmap_get_hash(ctx->fmap, &hash, CLI_HASH_MD5)) { - cli_dbgmsg("cli_magic_scan: Failed to get a hash for the current fmap.\n"); + if (cache_enabled || (SCAN_COLLECT_METADATA)) { + if (CL_SUCCESS != fmap_get_hash(ctx->fmap, &hash, CLI_HASH_MD5)) { + cli_dbgmsg("cli_magic_scan: Failed to get a hash for the current fmap.\n"); - // It may be that the file was truncated between the time we started the scan and the time we got the hash. - // Not a reason to print an error message. - ret = CL_SUCCESS; - goto done; + // It may be that the file was truncated between the time we started the scan and the time we got the hash. + // Not a reason to print an error message. + ret = CL_SUCCESS; + goto done; + } + hashed_size = ctx->fmap->len; } - hashed_size = ctx->fmap->len; ret = dispatch_file_inspection_callback(ctx->engine->cb_file_inspection, ctx, filetype); if (CL_CLEAN != ret) { @@ -4423,9 +4430,11 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) /* * Check if we've already scanned this file before. */ - perf_start(ctx, PERFT_CACHE); - cache_check_result = clean_cache_check(hash, hashed_size, ctx); - perf_stop(ctx, PERFT_CACHE); + if (cache_enabled) { + perf_start(ctx, PERFT_CACHE); + cache_check_result = clean_cache_check(hash, hashed_size, ctx); + perf_stop(ctx, PERFT_CACHE); + } if (SCAN_COLLECT_METADATA) { char hashstr[CLI_HASHLEN_MD5 * 2 + 1]; @@ -4443,7 +4452,7 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) } } - if (cache_check_result != CL_VIRUS) { + if (cache_enabled && (cache_check_result != CL_VIRUS)) { cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__); ret = CL_SUCCESS; goto early_ret; From 25dc1e99ad84849169398ede5ff1c74fb4fd41b3 Mon Sep 17 00:00:00 2001 From: Micah Snyder Date: Fri, 19 Apr 2024 16:26:00 -0400 Subject: [PATCH 2/2] Fix bug when collect-metadata is enabled and caching is disabled If SCAN_COLLECT_METADATA is enabled, and caching is disabled, we zero-out the hash after recording it. This results in a non-NULL and invalid-hash that may be passed to `cli_scan_fmap()` for the "raw mode" scan. It's an uncommon code path, but would result in comparing hash-sigs with a zeroed hash rather than the valid hash. This bug could result in a missed hash-based sig matches. There is no reason to invalidate or zero-out the hash if we happen to calculate it. We avoid the cache-lookup by checking the engine setting, not by checking if we have a hash. --- libclamav/scanners.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/libclamav/scanners.c b/libclamav/scanners.c index 8aab626131..8cc19297af 100644 --- a/libclamav/scanners.c +++ b/libclamav/scanners.c @@ -4443,9 +4443,6 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) hash[8], hash[9], hash[10], hash[11], hash[12], hash[13], hash[14], hash[15]); ret = cli_jsonstr(ctx->wrkproperty, "FileMD5", hashstr); - if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) { - memset(hash, 0, CLI_HASHLEN_MD5); - } if (ret != CL_SUCCESS) { cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__); goto early_ret; @@ -4498,7 +4495,7 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) * If self protection mechanism enabled, do the scanraw() scan first * before extracting with a file type parser. */ - ret = scanraw(ctx, type, 0, &dettype, (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) ? NULL : hash); + ret = scanraw(ctx, type, 0, &dettype, hash); // Evaluate the result from the scan to see if it end the scan of this layer early, // and to decid if we should propagate an error or not. @@ -4935,7 +4932,7 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) /* CL_TYPE_HTML: raw HTML files are not scanned, unless safety measure activated via DCONF */ if (type != CL_TYPE_IGNORED && (type != CL_TYPE_HTML || !(SCAN_PARSE_HTML) || !(DCONF_DOC & DOC_CONF_HTML_SKIPRAW)) && !ctx->engine->sdb) { - ret = scanraw(ctx, type, typercg, &dettype, (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) ? NULL : hash); + ret = scanraw(ctx, type, typercg, &dettype, hash); // Evaluate the result from the scan to see if it end the scan of this layer early, // and to decid if we should propagate an error or not.