From 8a308944e46f8c2aa054005d5aed89f2711f9c1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Tue, 6 Apr 2021 18:19:25 +0200 Subject: [PATCH] publisher: Skip script, pre and textarea content when looking for HTML elements Updates #7567 --- publisher/htmlElementsCollector.go | 87 ++++++++++++++++--------- publisher/htmlElementsCollector_test.go | 6 +- 2 files changed, 60 insertions(+), 33 deletions(-) diff --git a/publisher/htmlElementsCollector.go b/publisher/htmlElementsCollector.go index 1823a832776..d9479aafaa5 100644 --- a/publisher/htmlElementsCollector.go +++ b/publisher/htmlElementsCollector.go @@ -64,7 +64,7 @@ type cssClassCollectorWriter struct { buff bytes.Buffer isCollecting bool - dropValue bool + inPreTag string inQuote bool quoteValue byte @@ -90,49 +90,58 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) { b := p[i] w.toggleIfQuote(b) if !w.inQuote && b == '>' { - w.endCollecting(false) + w.endCollecting() break } w.buff.WriteByte(b) } if !w.isCollecting { - if w.dropValue { - w.buff.Reset() - } else { - // First check if we have processed this element before. - w.collector.mu.RLock() - - // See https://github.com/dominikh/go-tools/issues/723 - //lint:ignore S1030 This construct avoids memory allocation for the string. - seen := w.collector.elementSet[string(w.buff.Bytes())] - w.collector.mu.RUnlock() - if seen { - w.buff.Reset() - continue + if w.inPreTag != "" { + s := w.buff.String() + if tagName, isEnd := w.parseEndTag(s); isEnd && w.inPreTag == tagName { + w.inPreTag = "" } + w.buff.Reset() + continue + } - s := w.buff.String() + // First check if we have processed this element before. + w.collector.mu.RLock() + // See https://github.com/dominikh/go-tools/issues/723 + //lint:ignore S1030 This construct avoids memory allocation for the string. + seen := w.collector.elementSet[string(w.buff.Bytes())] + w.collector.mu.RUnlock() + if seen { w.buff.Reset() + continue + } - if strings.HasPrefix(s, ". @@ -154,15 +168,24 @@ func (c *cssClassCollectorWriter) insertStandinHTMLElement(el string) (string, s return newv, strings.ToLower(tag) } -func (c *cssClassCollectorWriter) endCollecting(drop bool) { +func (c *cssClassCollectorWriter) parseEndTag(s string) (string, bool) { + if !strings.HasPrefix(s, "") + return strings.ToLower(strings.TrimSpace(s)), true +} + +func (c *cssClassCollectorWriter) endCollecting() { c.isCollecting = false c.inQuote = false - c.dropValue = drop + } func (c *cssClassCollectorWriter) startCollecting() { c.isCollecting = true - c.dropValue = false + } func (c *cssClassCollectorWriter) toggleIfQuote(b byte) { diff --git a/publisher/htmlElementsCollector_test.go b/publisher/htmlElementsCollector_test.go index 2c2fd373306..5a1802234b4 100644 --- a/publisher/htmlElementsCollector_test.go +++ b/publisher/htmlElementsCollector_test.go @@ -89,8 +89,12 @@ func TestClassCollector(t *testing.T) { {"Alpine transition 1", `
`, f("div", "mobile:-translate-x-8 opacity-0 sm:-translate-y-8 transform", "")}, {"Vue bind", `
`, f("div", "active", "")}, - // https://github.com/gohugoio/hugo/issues/7746 + // Issue #7746 {"Apostrophe inside attribute value", `my text
`, f("a div", "missingclass", "")}, + // Issue #7567 + {"Script tags content should be skipped", `
`, f("div script", "foo", "")}, + {"Pre tags content should be skipped", `
foobar
`, f("div pre", "foo preclass", "")}, + {"Textare tags content should be skipped", `
`, f("div textarea", "foo textareaclass", "")}, } { c.Run(test.name, func(c *qt.C) { w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())