From 44832fc1b6dc0ccb7e612c474f9925f88180552d Mon Sep 17 00:00:00 2001 From: Andy Ragusa Date: Mon, 24 Jun 2024 14:16:39 -0700 Subject: [PATCH] Added complete list of URIs Thank you Matt Jolly for the helpful comment. --- libclamav/htmlnorm.c | 3 - libclamav/others.h | 2 +- libclamav/scanners.c | 445 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 416 insertions(+), 34 deletions(-) diff --git a/libclamav/htmlnorm.c b/libclamav/htmlnorm.c index a090e731f6..edd1bc00d7 100644 --- a/libclamav/htmlnorm.c +++ b/libclamav/htmlnorm.c @@ -490,7 +490,6 @@ static inline void html_tag_contents_append(struct tag_contents *cont, const uns uint32_t mbchar = 0; if (!begin || !end) return; - for (i = cont->pos; i < MAX_TAG_CONTENTS_LENGTH && (begin < end); i++) { uint8_t c = *begin++; if (mbchar && (c < 0x80 || mbchar >= 0x10000)) { @@ -688,7 +687,6 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha uint32_t mbchar = 0; uint32_t mbchar2 = 0; - /* * Initialize stack buffers. */ @@ -1931,7 +1929,6 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha cli_js_destroy(js_state); js_state = NULL; } - html_tag_arg_free(&tag_args); if (!m_area) { fclose(stream_in); diff --git a/libclamav/others.h b/libclamav/others.h index 4ffb7d0a50..fdff4283ca 100644 --- a/libclamav/others.h +++ b/libclamav/others.h @@ -552,7 +552,7 @@ extern LIBCLAMAV_EXPORT int have_rar; #define SCAN_HEURISTICS (ctx->options->general & CL_SCAN_GENERAL_HEURISTICS) #define SCAN_HEURISTIC_PRECEDENCE (ctx->options->general & CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE) #define SCAN_UNPRIVILEGED (ctx->options->general & CL_SCAN_GENERAL_UNPRIVILEGED) -#define STORE_HTML_URLS (ctx->options->general & CL_SCAN_STORE_HTML_URLS) +#define STORE_HTML_URLS (ctx->options->general & CL_SCAN_STORE_HTML_URLS) #define SCAN_PARSE_ARCHIVE (ctx->options->parse & CL_SCAN_PARSE_ARCHIVE) #define SCAN_PARSE_ELF (ctx->options->parse & CL_SCAN_PARSE_ELF) diff --git a/libclamav/scanners.c b/libclamav/scanners.c index d9a577f02b..b4c47df156 100644 --- a/libclamav/scanners.c +++ b/libclamav/scanners.c @@ -2082,32 +2082,418 @@ static cl_error_t cli_ole2_tempdir_scan_for_xlm_and_images(const char *dir, cli_ return ret; } -const char * const HTML_URLS_JSON_KEY = "HTMLUrls"; - - - -static bool is_url(const char * const str){ +const char *const HTML_URLS_JSON_KEY = "HTMLUrls"; +/* https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml */ +/* clang-format off */ +const char * URI_LIST[] = { + "aaa://" + , "aaas://" + , "about://" + , "acap://" + , "acct://" + , "acd://" + , "acr://" + , "adiumxtra://" + , "adt://" + , "afp://" + , "afs://" + , "aim://" + , "amss://" + , "android://" + , "appdata://" + , "apt://" + , "ar://" + , "ark://" + , "at://" + , "attachment://" + , "aw://" + , "barion://" + , "bb://" + , "beshare://" + , "bitcoin://" + , "bitcoincash://" + , "blob://" + , "bolo://" + , "brid://" + , "browserext://" + , "cabal://" + , "calculator://" + , "callto://" + , "cap://" + , "cast://" + , "casts://" + , "chrome://" + , "chrome-extension://" + , "cid://" + , "coap://" + , "coap+tcp://" + , "coap+ws://" + , "coaps://" + , "coaps+tcp://" + , "coaps+ws://" + , "com-eventbrite-attendee://" + , "content://" + , "content-type://" + , "crid://" + , "cstr://" + , "cvs://" + , "dab://" + , "dat://" + , "data://" + , "dav://" + , "dhttp://" + , "diaspora://" + , "dict://" + , "did://" + , "dis://" + , "dlna-playcontainer://" + , "dlna-playsingle://" + , "dns://" + , "dntp://" + , "doi://" + , "dpp://" + , "drm://" + , "drop://" + , "dtmi://" + , "dtn://" + , "dvb://" + , "dvx://" + , "dweb://" + , "ed2k://" + , "eid://" + , "elsi://" + , "embedded://" + , "ens://" + , "ethereum://" + , "example://" + , "facetime://" + , "fax://" + , "feed://" + , "feedready://" + , "fido://" + , "file://" + , "filesystem://" + , "finger://" + , "first-run-pen-experience://" + , "fish://" + , "fm://" + , "ftp://" + , "fuchsia-pkg://" + , "geo://" + , "gg://" + , "git://" + , "gitoid://" + , "gizmoproject://" + , "go://" + , "gopher://" + , "graph://" + , "grd://" + , "gtalk://" + , "h323://" + , "ham://" + , "hcap://" + , "hcp://" + , "hs20://" + , "http://" + , "https://" + , "hxxp://" + , "hxxps://" + , "hydrazone://" + , "hyper://" + , "iax://" + , "icap://" + , "icon://" + , "im://" + , "imap://" + , "info://" + , "iotdisco://" + , "ipfs://" + , "ipn://" + , "ipns://" + , "ipp://" + , "ipps://" + , "irc://" + , "irc6://" + , "ircs://" + , "iris://" + , "iris.beep://" + , "iris.lwz://" + , "iris.xpc://" + , "iris.xpcs://" + , "isostore://" + , "itms://" + , "jabber://" + , "jar://" + , "jms://" + , "keyparc://" + , "lastfm://" + , "lbry://" + , "ldap://" + , "ldaps://" + , "leaptofrogans://" + , "lid://" + , "lorawan://" + , "lpa://" + , "lvlt://" + , "machineProvisioningProgressReporter://" + , "magnet://" + , "mailserver://" + , "mailto://" + , "maps://" + , "market://" + , "matrix://" + , "message://" + , "microsoft.windows.camera://" + , "microsoft.windows.camera.multipicker://" + , "microsoft.windows.camera.picker://" + , "mid://" + , "mms://" + , "modem://" + , "mongodb://" + , "moz://" + , "ms-access://" + , "ms-appinstaller://" + , "ms-browser-extension://" + , "ms-calculator://" + , "ms-drive-to://" + , "ms-enrollment://" + , "ms-excel://" + , "ms-eyecontrolspeech://" + , "ms-gamebarservices://" + , "ms-gamingoverlay://" + , "ms-getoffice://" + , "ms-help://" + , "ms-infopath://" + , "ms-inputapp://" + , "ms-launchremotedesktop://" + , "ms-lockscreencomponent-config://" + , "ms-media-stream-id://" + , "ms-meetnow://" + , "ms-mixedrealitycapture://" + , "ms-mobileplans://" + , "ms-newsandinterests://" + , "ms-officeapp://" + , "ms-people://" + , "ms-project://" + , "ms-powerpoint://" + , "ms-publisher://" + , "ms-recall://" + , "ms-remotedesktop://" + , "ms-remotedesktop-launch://" + , "ms-restoretabcompanion://" + , "ms-screenclip://" + , "ms-screensketch://" + , "ms-search://" + , "ms-search-repair://" + , "ms-secondary-screen-controller://" + , "ms-secondary-screen-setup://" + , "ms-settings://" + , "ms-settings-airplanemode://" + , "ms-settings-bluetooth://" + , "ms-settings-camera://" + , "ms-settings-cellular://" + , "ms-settings-cloudstorage://" + , "ms-settings-connectabledevices://" + , "ms-settings-displays-topology://" + , "ms-settings-emailandaccounts://" + , "ms-settings-language://" + , "ms-settings-location://" + , "ms-settings-lock://" + , "ms-settings-nfctransactions://" + , "ms-settings-notifications://" + , "ms-settings-power://" + , "ms-settings-privacy://" + , "ms-settings-proximity://" + , "ms-settings-screenrotation://" + , "ms-settings-wifi://" + , "ms-settings-workplace://" + , "ms-spd://" + , "ms-stickers://" + , "ms-sttoverlay://" + , "ms-transit-to://" + , "ms-useractivityset://" + , "ms-virtualtouchpad://" + , "ms-visio://" + , "ms-walk-to://" + , "ms-whiteboard://" + , "ms-whiteboard-cmd://" + , "ms-word://" + , "msnim://" + , "msrp://" + , "msrps://" + , "mss://" + , "mt://" + , "mtqp://" + , "mumble://" + , "mupdate://" + , "mvn://" + , "mvrp://" + , "mvrps://" + , "news://" + , "nfs://" + , "ni://" + , "nih://" + , "nntp://" + , "notes://" + , "num://" + , "ocf://" + , "oid://" + , "onenote://" + , "onenote-cmd://" + , "opaquelocktoken://" + , "openid://" + , "openpgp4fpr://" + , "otpauth://" + , "p1://" + , "pack://" + , "palm://" + , "paparazzi://" + , "payment://" + , "payto://" + , "pkcs11://" + , "platform://" + , "pop://" + , "pres://" + , "prospero://" + , "proxy://" + , "pwid://" + , "psyc://" + , "pttp://" + , "qb://" + , "query://" + , "quic-transport://" + , "redis://" + , "rediss://" + , "reload://" + , "res://" + , "resource://" + , "rmi://" + , "rsync://" + , "rtmfp://" + , "rtmp://" + , "rtsp://" + , "rtsps://" + , "rtspu://" + , "sarif://" + , "secondlife://" + , "secret-token://" + , "service://" + , "session://" + , "sftp://" + , "sgn://" + , "shc://" + , "shttp://" + , "sieve://" + , "simpleledger://" + , "simplex://" + , "sip://" + , "sips://" + , "skype://" + , "smb://" + , "smp://" + , "sms://" + , "smtp://" + , "snews://" + , "snmp://" + , "soap.beep://" + , "soap.beeps://" + , "soldat://" + , "spiffe://" + , "spotify://" + , "ssb://" + , "ssh://" + , "starknet://" + , "steam://" + , "stun://" + , "stuns://" + , "submit://" + , "svn://" + , "swh://" + , "swid://" + , "swidpath://" + , "tag://" + , "taler://" + , "teamspeak://" + , "tel://" + , "teliaeid://" + , "telnet://" + , "tftp://" + , "things://" + , "thismessage://" + , "tip://" + , "tn3270://" + , "tool://" + , "turn://" + , "turns://" + , "tv://" + , "udp://" + , "unreal://" + , "upt://" + , "urn://" + , "ut2004://" + , "uuid-in-package://" + , "v-event://" + , "vemmi://" + , "ventrilo://" + , "ves://" + , "videotex://" + , "vnc://" + , "view-source://" + , "vscode://" + , "vscode-insiders://" + , "vsls://" + , "w3://" + , "wais://" + , "web3://" + , "wcr://" + , "webcal://" + , "web+ap://" + , "wifi://" + , "wpid://" + , "ws://" + , "wss://" + , "wtai://" + , "wyciwyg://" + , "xcon://" + , "xcon-userid://" + , "xfire://" + , "xmlrpc.beep://" + , "xmlrpc.beeps://" + , "xmpp://" + , "xftp://" + , "xrcp://" + , "xri://" + , "ymsgr://" + , "z39.50://" + , "z39.50r://" + , "z39.50s://" +}; +/* clang-format on */ + +static bool is_url(const char *const str) +{ -#define MATCH(str, prefix) \ - do { \ - if (str && (strlen(str) > strlen(prefix)) \ - && (0 == strncasecmp(str, prefix, strlen(prefix)))) { \ - bRet = true; \ - goto done; \ - } \ +#define MATCH(str, prefix) \ + do { \ + if (str && (strlen(str) > strlen(prefix)) && (0 == strncasecmp(str, prefix, strlen(prefix)))) { \ + bRet = true; \ + goto done; \ + } \ } while (0); bool bRet = false; + size_t i; - MATCH(str, "https://"); - MATCH(str, "http://"); - MATCH(str, "ftp://"); + for (i = 0; i < sizeof(URI_LIST) / sizeof(URI_LIST[0]); i++) { + MATCH(str, URI_LIST[i]); + } done: return bRet; #undef MATCH } -static void save_urls(cli_ctx * ctx, tag_arguments_t * hrefs) { - int i = 0; +static void save_urls(cli_ctx *ctx, tag_arguments_t *hrefs) +{ + int i = 0; bool haveOne = false; if (NULL == hrefs) { return; @@ -2121,28 +2507,27 @@ static void save_urls(cli_ctx * ctx, tag_arguments_t * hrefs) { return; } - for (i = 0; i < hrefs->count; i++){ - if (is_url((const char *) hrefs->value[i])) { + for (i = 0; i < hrefs->count; i++) { + if (is_url((const char *)hrefs->value[i])) { haveOne = true; break; } } - - if (!haveOne){ + + if (!haveOne) { return; } - json_object *ary = cli_jsonarray(ctx->wrkproperty, HTML_URLS_JSON_KEY ); + json_object *ary = cli_jsonarray(ctx->wrkproperty, HTML_URLS_JSON_KEY); if (ary) { - for (i = 0; i < hrefs->count; i++){ - if (is_url((const char *) hrefs->value[i])){ - cli_jsonstr(ary, NULL, (const char *) hrefs->value[i]); + for (i = 0; i < hrefs->count; i++) { + if (is_url((const char *)hrefs->value[i])) { + cli_jsonstr(ary, NULL, (const char *)hrefs->value[i]); } } } else { - cli_dbgmsg("[cli_scanhtml] Failed to add \"%s\" entry JSON array\n", HTML_URLS_JSON_KEY ); + cli_dbgmsg("[cli_scanhtml] Failed to add \"%s\" entry JSON array\n", HTML_URLS_JSON_KEY); } - } static cl_error_t cli_scanhtml(cli_ctx *ctx) @@ -2179,7 +2564,7 @@ static cl_error_t cli_scanhtml(cli_ctx *ctx) /* Output JSON Summary Information */ if (STORE_HTML_URLS && SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) { tag_arguments_t hrefs = {0}; - hrefs.scanContents = 1; + hrefs.scanContents = 1; (void)html_normalise_map(ctx, map, tempname, &hrefs, ctx->dconf); save_urls(ctx, &hrefs); } else { @@ -4283,9 +4668,9 @@ static inline bool result_should_goto_done(cli_ctx *ctx, cl_error_t result_in, c cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) { - cl_error_t ret = CL_CLEAN; + cl_error_t ret = CL_CLEAN; cl_error_t cache_check_result = CL_VIRUS; - bool cache_enabled = true; + bool cache_enabled = true; cl_error_t verdict_at_this_level; cli_file_t dettype = 0; uint8_t typercg = 1;