Skip to content

Commit

Permalink
blah
Browse files Browse the repository at this point in the history
  • Loading branch information
ragusaa committed Jun 18, 2024
1 parent 033088e commit 638344d
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 64 deletions.
53 changes: 0 additions & 53 deletions libclamav/htmlnorm.c
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,6 @@ static inline void html_output_c(file_buff_t *fbuff1, unsigned char c)

static void html_output_str(file_buff_t *fbuff, const unsigned char *str, size_t len)
{
//fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, str);
if (fbuff) {
if ((fbuff->length + len) >= HTML_FILE_BUFF_LEN) {
html_output_flush(fbuff);
Expand Down Expand Up @@ -492,17 +491,6 @@ static inline void html_tag_contents_append(struct tag_contents *cont, const uns
if (!begin || !end)
return;

fprintf(stderr, "%s::%d::", __FUNCTION__, __LINE__);
{
const unsigned char * idx = begin;
while (idx != end){
fprintf(stderr, "%c", *idx);
idx++;
}
fprintf(stderr, "\n");

}

for (i = cont->pos; i < MAX_TAG_CONTENTS_LENGTH && (begin < end); i++) {
uint8_t c = *begin++;
if (mbchar && (c < 0x80 || mbchar >= 0x10000)) {
Expand Down Expand Up @@ -662,37 +650,6 @@ static void js_process(struct parser_state *js_state, const unsigned char *js_be
}
}

/*ANDY
*
typedef struct tag_arguments_tag {
int count;
int scanContents;
unsigned char **tag;
unsigned char **value;
unsigned char **contents;
} tag_arguments_t;
*
* */
static void dumpTagArguments(tag_arguments_t * tagArgs, const char * const varname){
int i;

fprintf(stderr, "%s::%d::%s::%p\n", __FUNCTION__, __LINE__, varname, tagArgs);
if (tagArgs) {
fprintf(stderr, "%s::%d::%s::%d::%d\n", __FUNCTION__, __LINE__, varname, tagArgs->count, tagArgs->scanContents);
fprintf(stderr, "%s::%d::%s::%p::%p::%p\n", __FUNCTION__, __LINE__, varname, tagArgs->tag, tagArgs->value, tagArgs->contents);
for (i = 0; i < tagArgs->count; i++){
fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, tagArgs->tag[i]);
fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, tagArgs->value[i]);
}
fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, *(tagArgs->contents));
for (i = 0; i < tagArgs->scanContents; i++){
fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, tagArgs->contents[i]);
}
}

fprintf(stderr, "%s::%d::LEAVING\n", __FUNCTION__, __LINE__);
}

static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const char *dirname, tag_arguments_t *hrefs, const struct cli_dconf *dconf)
{
int fd_tmp, tag_length = 0, tag_arg_length = 0;
Expand Down Expand Up @@ -818,7 +775,6 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha
while (*ptr && isspace(*ptr)) {
ptr++;
}
// fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, ptr);
while (*ptr) {
if (!binary && *ptr == '\n') {
/* Convert it to a space and re-process */
Expand Down Expand Up @@ -894,8 +850,6 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha
case HTML_NORM:
if (*ptr == '<') {
ptrend = ptr; /* for use by scanContents */
fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, ptrend);
//this is the only place I am seeing them;
html_output_c(file_buff_o2, '<');
if (in_tag == TAG_DONT_EXTRACT && !text_space_written) {
html_output_c(file_buff_text, ' ');
Expand All @@ -904,7 +858,6 @@ fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, ptrend);
if (hrefs && hrefs->scanContents && in_ahref && href_contents_begin) {
/*append this text portion to the contents of <a>*/
html_tag_contents_append(&contents, href_contents_begin, ptr);
fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, ptr);
href_contents_begin = NULL; /*We just encountered another tag inside <a>, so skip it*/
}
ptr++;
Expand Down Expand Up @@ -1321,7 +1274,6 @@ fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, ptrend);
href_contents_begin = ptr;
if (strcmp(tag, "a") == 0) {
arg_value = html_tag_arg_value(&tag_args, "href");
fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, arg_value);
if (arg_value && strlen((const char *)arg_value) > 0) {
if (hrefs->scanContents) {
char *arg_value_title = html_tag_arg_value(&tag_args, "title");
Expand Down Expand Up @@ -1433,8 +1385,6 @@ fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, ptrend);
/* a/img tags for buff_text can be processed only if we're not processing hrefs */
arg_value = html_tag_arg_value(&tag_args, "href");
if (arg_value && arg_value[0]) {
fprintf(stderr, "%s::%d::tag = '%s'\n", __FUNCTION__, __LINE__, tag);
fprintf(stderr, "%s::%d::URL = '%s'\n", __FUNCTION__, __LINE__, arg_value);
html_output_str(file_buff_text, (const unsigned char *)arg_value, strlen((const char *)arg_value));
html_output_c(file_buff_text, ' ');
text_space_written = true;
Expand Down Expand Up @@ -1982,9 +1932,6 @@ fprintf(stderr, "%s::%d::'%s'\n", __FUNCTION__, __LINE__, ptrend);
js_state = NULL;
}

//dumpTagArguments(&tag_args, "tag_args");
dumpTagArguments(hrefs, "hrefs");

html_tag_arg_free(&tag_args);
if (!m_area) {
fclose(stream_in);
Expand Down
14 changes: 3 additions & 11 deletions libclamav/scanners.c
Original file line number Diff line number Diff line change
Expand Up @@ -2122,7 +2122,7 @@ static void save_urls(cli_ctx * ctx, tag_arguments_t * hrefs) {
}

for (i = 0; i < hrefs->count; i++){
if (is_url(hrefs->value[i])) {
if (is_url((const char *) hrefs->value[i])) {
haveOne = true;
break;
}
Expand All @@ -2135,8 +2135,8 @@ static void save_urls(cli_ctx * ctx, tag_arguments_t * hrefs) {
json_object *ary = cli_jsonarray(ctx->wrkproperty, HTML_URLS_JSON_KEY );
if (ary) {
for (i = 0; i < hrefs->count; i++){
if (is_url(hrefs->value[i])){
cli_jsonstr(ary, NULL, hrefs->value[i]);
if (is_url((const char *) hrefs->value[i])){
cli_jsonstr(ary, NULL, (const char *) hrefs->value[i]);
}
}
} else {
Expand Down Expand Up @@ -2178,18 +2178,10 @@ static cl_error_t cli_scanhtml(cli_ctx *ctx)

/* Output JSON Summary Information */
if (STORE_HTML_URLS && SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
size_t idx;
fprintf(stderr, "%s::%d::Check option here\n", __FUNCTION__, __LINE__);

fprintf(stderr, "%s::%d::Calling html_normalise\n", __FUNCTION__, __LINE__);
tag_arguments_t hrefs = {0};
hrefs.scanContents = 1;
(void)html_normalise_map(ctx, map, tempname, &hrefs, ctx->dconf);

fprintf(stderr, "%s::%d::Store all this stuff\n", __FUNCTION__, __LINE__);

save_urls(ctx, &hrefs);

} else {
(void)html_normalise_map(ctx, map, tempname, NULL, ctx->dconf);
}
Expand Down

0 comments on commit 638344d

Please sign in to comment.