diff --git a/src/parser.c b/src/parser.c
index dc692b3e..dd29353a 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -124,12 +124,10 @@ static const GumboStringPiece kQuirksModePublicIdPrefixes[] = {
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
- GUMBO_STRING(
- "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
- "extensions to HTML 4.0//"),
- GUMBO_STRING(
- "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
- "extensions to HTML 4.0//"),
+ GUMBO_STRING("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
+ "extensions to HTML 4.0//"),
+ GUMBO_STRING("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
+ "extensions to HTML 4.0//"),
GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
@@ -573,9 +571,9 @@ static GumboInsertionMode get_appropriate_insertion_mode(
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
if (node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML)
- return is_last ?
- GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
-
+ return is_last ? GUMBO_INSERTION_MODE_IN_BODY
+ : GUMBO_INSERTION_MODE_INITIAL;
+
switch (node->v.element.tag) {
case GUMBO_TAG_SELECT: {
if (is_last) {
@@ -972,7 +970,7 @@ static void append_comment_node(
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
static void clear_stack_to_table_row_context(GumboParser* parser) {
while (!node_tag_in_set(get_current_node(parser),
- (gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
+ (gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
pop_current_node(parser);
}
}
@@ -980,16 +978,16 @@ static void clear_stack_to_table_row_context(GumboParser* parser) {
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
static void clear_stack_to_table_context(GumboParser* parser) {
while (!node_tag_in_set(get_current_node(parser),
- (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
+ (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
pop_current_node(parser);
}
}
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
void clear_stack_to_table_body_context(GumboParser* parser) {
- while (!node_tag_in_set(get_current_node(parser),
- (gumbo_tagset){TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
- TAG(TEMPLATE)})) {
+ while (!node_tag_in_set(
+ get_current_node(parser), (gumbo_tagset){TAG(HTML), TAG(TBODY),
+ TAG(TFOOT), TAG(THEAD), TAG(TEMPLATE)})) {
pop_current_node(parser);
}
}
@@ -1486,12 +1484,12 @@ static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
// This is the "generate all implied end tags thoroughly" clause of the spec.
// https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
- for (
- ; node_tag_in_set(get_current_node(parser),
- (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI),
- TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC),
- TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)});
- pop_current_node(parser))
+ for (; node_tag_in_set(get_current_node(parser),
+ (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT),
+ TAG(LI), TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT),
+ TAG(RTC), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD),
+ TAG(TR)});
+ pop_current_node(parser))
;
}
@@ -1958,7 +1956,8 @@ static bool adoption_agency_algorithm(
if (last_node == furthest_block) {
bookmark = formatting_index + 1;
gumbo_debug("Bookmark moved to %d.\n", bookmark);
- assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
+ assert((unsigned int) bookmark <=
+ state->_active_formatting_elements.length);
}
// Step 13.9.
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
@@ -2018,7 +2017,8 @@ static bool adoption_agency_algorithm(
gumbo_vector_remove_at(
parser, formatting_node_index, &state->_active_formatting_elements);
assert(bookmark >= 0);
- assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
+ assert(
+ (unsigned int) bookmark <= state->_active_formatting_elements.length);
gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
&state->_active_formatting_elements);
@@ -2386,656 +2386,799 @@ static void destroy_node(GumboParser* parser, GumboNode* node) {
static bool handle_in_body(GumboParser* parser, GumboToken* token) {
GumboParserState* state = parser->_parser_state;
assert(state->_open_elements.length > 0);
- if (token->type == GUMBO_TOKEN_NULL) {
- parser_add_parse_error(parser, token);
- ignore_token(parser);
- return false;
- } else if (token->type == GUMBO_TOKEN_WHITESPACE) {
- reconstruct_active_formatting_elements(parser);
- insert_text_token(parser, token);
- return true;
- } else if (token->type == GUMBO_TOKEN_CHARACTER ||
- token->type == GUMBO_TOKEN_CDATA) {
- reconstruct_active_formatting_elements(parser);
- insert_text_token(parser, token);
- set_frameset_not_ok(parser);
- return true;
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
- append_comment_node(parser, get_current_node(parser), token);
- return true;
- } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
- parser_add_parse_error(parser, token);
- ignore_token(parser);
- return false;
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
- parser_add_parse_error(parser, token);
- if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
- ignore_token(parser);
- return false;
- }
- assert(parser->_output->root != NULL);
- assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
- merge_attributes(parser, token, parser->_output->root);
- return false;
- } else if (tag_in(token, kStartTag,
- (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
- TAG(MENUITEM), TAG(LINK), TAG(META), TAG(NOFRAMES),
- TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
- tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
- return handle_in_head(parser, token);
- } else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
- parser_add_parse_error(parser, token);
- if (state->_open_elements.length < 2 ||
- !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
- has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
- ignore_token(parser);
- return false;
- }
- state->_frameset_ok = false;
- merge_attributes(parser, token, state->_open_elements.data[1]);
- return false;
- } else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
- parser_add_parse_error(parser, token);
- if (state->_open_elements.length < 2 ||
- !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
- !state->_frameset_ok) {
- ignore_token(parser);
- return false;
- }
- // Save the body node for later removal.
- GumboNode* body_node = state->_open_elements.data[1];
-
- // Pop all nodes except root HTML element.
- GumboNode* node;
- do {
- node = pop_current_node(parser);
- } while (node != state->_open_elements.data[1]);
-
- // Removing & destroying the body node is going to kill any nodes that have
- // been added to the list of active formatting elements, and so we should
- // clear it to prevent a use-after-free if the list of active formatting
- // elements is reconstructed afterwards. This may happen if whitespace
- // follows the .
- clear_active_formatting_elements(parser);
-
- // Remove the body node. We may want to factor this out into a generic
- // helper, but right now this is the only code that needs to do this.
- GumboVector* children = &parser->_output->root->v.element.children;
- for (unsigned int i = 0; i < children->length; ++i) {
- if (children->data[i] == body_node) {
- gumbo_vector_remove_at(parser, i, children);
- break;
- }
- }
- destroy_node(parser, body_node);
-
- // Insert the
.
+ clear_active_formatting_elements(parser);
+
+ // Remove the body node. We may want to factor this out into a
+ // generic helper, but right now this is the only code that needs to
+ // do this.
+ GumboVector* children = &parser->_output->root->v.element.children;
+ for (unsigned int i = 0; i < children->length; ++i) {
+ if (children->data[i] == body_node) {
+ gumbo_vector_remove_at(parser, i, children);
+ break;
+ }
+ }
+ destroy_node(parser, body_node);
+
+ // Insert the