From b1c32bb6c8340144e2a3c09755f70090a9dfc928 Mon Sep 17 00:00:00 2001 From: Sven Rebhan Date: Thu, 2 Nov 2023 16:55:56 +0100 Subject: [PATCH 1/3] Add test-case for issue #14237 --- config/internal_test.go | 23 +++++ config/testdata/envvar_comments.toml | 99 +++++++++++++++++++ config/testdata/envvar_comments_expected.toml | 99 +++++++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 config/testdata/envvar_comments.toml create mode 100644 config/testdata/envvar_comments_expected.toml diff --git a/config/internal_test.go b/config/internal_test.go index b6cd9e482a550..5692cd19b6f5b 100644 --- a/config/internal_test.go +++ b/config/internal_test.go @@ -1,9 +1,12 @@ package config import ( + "bytes" "fmt" "net/http" "net/http/httptest" + "os" + "path/filepath" "testing" "time" @@ -351,6 +354,26 @@ func TestParseConfig(t *testing.T) { } } +func TestRemoveComments(t *testing.T) { + // Read expectation + expected, err := os.ReadFile(filepath.Join("testdata", "envvar_comments_expected.toml")) + require.NoError(t, err) + + // Read the file and remove the comments + buf, err := os.ReadFile(filepath.Join("testdata", "envvar_comments.toml")) + require.NoError(t, err) + removed, err := removeComments(buf) + require.NoError(t, err) + lines := bytes.Split(removed, []byte{'\n'}) + for i, line := range lines { + lines[i] = bytes.TrimRight(line, " \t") + } + actual := bytes.Join(lines, []byte{'\n'}) + + // Do the comparison + require.Equal(t, string(expected), string(actual)) +} + func TestURLRetries3Fails(t *testing.T) { httpLoadConfigRetryInterval = 0 * time.Second responseCounter := 0 diff --git a/config/testdata/envvar_comments.toml b/config/testdata/envvar_comments.toml new file mode 100644 index 0000000000000..5f35f04327c22 --- /dev/null +++ b/config/testdata/envvar_comments.toml @@ -0,0 +1,99 @@ +# Telegraf Configuration +# +# Telegraf is entirely plugin driven. All metrics are gathered from the +# declared inputs, and sent to the declared outputs. +# +# Plugins must be declared in here to be active. +# To deactivate a plugin, comment out the name and any variables. +# +# Use 'telegraf -config telegraf.conf -test' to see what metrics a config +# file would generate. +# +# Environment variables can be used anywhere in this config file, simply surround +# them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"), +# for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR}) + +[global_tags] + +[agent] +interval = "10s" +round_interval = true +metric_batch_size = 1000 +metric_buffer_limit = 10000 +collection_jitter = "0s" +flush_interval = '10s' +flush_jitter = "0s" +precision = "" +hostname = '' +omit_hostname = false + +[[outputs.influxdb]] + setting1 = '#'#test + setting2 = '''#'''#test + setting3 = "#"#test + setting4 = """#"""#test + wicked1 = "\""#test + wicked2 = """\""""#test + +[[inputs.cpu]] + percpu = true + #totalcpu = true + # collect_cpu_time = false + ## report_active = false + +[[a.plugin]] + mylist = [ + "value 1", # a good value + "value 2", # a better value + "value 3", "value 4", + 'value5', """tagwith#value""", + ] # Should work + +[[some.stuff]] + a = 'not a #comment' + b = '''not a #comment''' + c = "not a #comment" + d = """not a #comment""" + e = '''not a #comment containing "quotes"''' + f = '''not a #comment containing 'quotes'?''' + g = """not a #comment containing "quotes"?""" + +# Issue #14237 +[[inputs.myplugin]] +value = '''This isn't a #comment.''' + +[[processors.starlark]] + script = """ +# Drop fields if they contain a string. +# +# Example Input: +# measurement,host=hostname a=1,b="somestring" 1597255410000000000 +# +# Example Output: +# measurement,host=hostname a=1 1597255410000000000 + +def apply(metric): + for k, v in metric.fields.items(): + if type(v) == "string": + metric.fields.pop(k) + + return metric +""" + +[[processors.starlark]] + script = ''' +# Drop fields if they contain a string. +# +# Example Input: +# measurement,host=hostname a=1,b="somestring" 1597255410000000000 +# +# Example Output: +# measurement,host=hostname a=1 1597255410000000000 + +def apply(metric): + for k, v in metric.fields.items(): + if type(v) == "string": + metric.fields.pop(k) + + return metric +''' diff --git a/config/testdata/envvar_comments_expected.toml b/config/testdata/envvar_comments_expected.toml new file mode 100644 index 0000000000000..3e38656fd9217 --- /dev/null +++ b/config/testdata/envvar_comments_expected.toml @@ -0,0 +1,99 @@ + + + + + + + + + + + + + + + +[global_tags] + +[agent] +interval = "10s" +round_interval = true +metric_batch_size = 1000 +metric_buffer_limit = 10000 +collection_jitter = "0s" +flush_interval = '10s' +flush_jitter = "0s" +precision = "" +hostname = '' +omit_hostname = false + +[[outputs.influxdb]] + setting1 = '#' + setting2 = '''#''' + setting3 = "#" + setting4 = """#""" + wicked1 = "\"" + wicked2 = """\"""" + +[[inputs.cpu]] + percpu = true + + + + +[[a.plugin]] + mylist = [ + "value 1", + "value 2", + "value 3", "value 4", + 'value5', """tagwith#value""", + ] + +[[some.stuff]] + a = 'not a #comment' + b = '''not a #comment''' + c = "not a #comment" + d = """not a #comment""" + e = '''not a #comment containing "quotes"''' + f = '''not a #comment containing 'quotes'?''' + g = """not a #comment containing "quotes"?""" + + +[[inputs.myplugin]] +value = '''This isn't a #comment.''' + +[[processors.starlark]] + script = """ +# Drop fields if they contain a string. +# +# Example Input: +# measurement,host=hostname a=1,b="somestring" 1597255410000000000 +# +# Example Output: +# measurement,host=hostname a=1 1597255410000000000 + +def apply(metric): + for k, v in metric.fields.items(): + if type(v) == "string": + metric.fields.pop(k) + + return metric +""" + +[[processors.starlark]] + script = ''' +# Drop fields if they contain a string. +# +# Example Input: +# measurement,host=hostname a=1,b="somestring" 1597255410000000000 +# +# Example Output: +# measurement,host=hostname a=1 1597255410000000000 + +def apply(metric): + for k, v in metric.fields.items(): + if type(v) == "string": + metric.fields.pop(k) + + return metric +''' From d250b367946c97f923dbc10854130a59521acacb Mon Sep 17 00:00:00 2001 From: Sven Rebhan Date: Thu, 2 Nov 2023 16:57:16 +0100 Subject: [PATCH 2/3] Move environment variable related functions to own file --- config/config.go | 99 ------------------------------------------ config/envvar.go | 109 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 99 deletions(-) create mode 100644 config/envvar.go diff --git a/config/config.go b/config/config.go index 06be5ada09f96..97f8afc974497 100644 --- a/config/config.go +++ b/config/config.go @@ -20,8 +20,6 @@ import ( "sync" "time" - "github.com/compose-spec/compose-go/template" - "github.com/compose-spec/compose-go/utils" "github.com/coreos/go-semver/semver" "github.com/influxdata/toml" "github.com/influxdata/toml/ast" @@ -790,103 +788,6 @@ func parseConfig(contents []byte) (*ast.Table, error) { return toml.Parse(outputBytes) } -func removeComments(contents []byte) ([]byte, error) { - tomlReader := bytes.NewReader(contents) - - // Initialize variables for tracking state - var inQuote, inComment, escaped bool - var quoteChar byte - - // Initialize buffer for modified TOML data - var output bytes.Buffer - - buf := make([]byte, 1) - // Iterate over each character in the file - for { - _, err := tomlReader.Read(buf) - if err != nil { - if errors.Is(err, io.EOF) { - break - } - return nil, err - } - char := buf[0] - - // Toggle the escaped state at backslash to we have true every odd occurrence. - if char == '\\' { - escaped = !escaped - } - - if inComment { - // If we're currently in a comment, check if this character ends the comment - if char == '\n' { - // End of line, comment is finished - inComment = false - _, _ = output.WriteRune('\n') - } - } else if inQuote { - // If we're currently in a quote, check if this character ends the quote - if char == quoteChar && !escaped { - // End of quote, we're no longer in a quote - inQuote = false - } - output.WriteByte(char) - } else { - // Not in a comment or a quote - if (char == '"' || char == '\'') && !escaped { - // Start of quote - inQuote = true - quoteChar = char - output.WriteByte(char) - } else if char == '#' && !escaped { - // Start of comment - inComment = true - } else { - // Not a comment or a quote, just output the character - output.WriteByte(char) - } - } - - // Reset escaping if any other character occurred - if char != '\\' { - escaped = false - } - } - return output.Bytes(), nil -} - -func substituteEnvironment(contents []byte, oldReplacementBehavior bool) ([]byte, error) { - options := []template.Option{ - template.WithReplacementFunction(func(s string, m template.Mapping, cfg *template.Config) (string, error) { - result, applied, err := template.DefaultReplacementAppliedFunc(s, m, cfg) - if err == nil && !applied { - // Keep undeclared environment-variable patterns to reproduce - // pre-v1.27 behavior - return s, nil - } - if err != nil && strings.HasPrefix(err.Error(), "Invalid template:") { - // Keep invalid template patterns to ignore regexp substitutions - // like ${1} - return s, nil - } - return result, err - }), - template.WithoutLogging, - } - if oldReplacementBehavior { - options = append(options, template.WithPattern(oldVarRe)) - } - - envMap := utils.GetAsEqualsMap(os.Environ()) - retVal, err := template.SubstituteWithOptions(string(contents), func(k string) (string, bool) { - if v, ok := envMap[k]; ok { - return v, ok - } - return "", false - }, options...) - return []byte(retVal), err -} - func (c *Config) addAggregator(name string, table *ast.Table) error { creator, ok := aggregators.Aggregators[name] if !ok { diff --git a/config/envvar.go b/config/envvar.go new file mode 100644 index 0000000000000..d65d8171b51d0 --- /dev/null +++ b/config/envvar.go @@ -0,0 +1,109 @@ +package config + +import ( + "bytes" + "errors" + "io" + "os" + "strings" + + "github.com/compose-spec/compose-go/template" + "github.com/compose-spec/compose-go/utils" +) + +func removeComments(contents []byte) ([]byte, error) { + tomlReader := bytes.NewReader(contents) + + // Initialize variables for tracking state + var inQuote, inComment, escaped bool + var quoteChar byte + + // Initialize buffer for modified TOML data + var output bytes.Buffer + + buf := make([]byte, 1) + // Iterate over each character in the file + for { + _, err := tomlReader.Read(buf) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + char := buf[0] + + // Toggle the escaped state at backslash to we have true every odd occurrence. + if char == '\\' { + escaped = !escaped + } + + if inComment { + // If we're currently in a comment, check if this character ends the comment + if char == '\n' { + // End of line, comment is finished + inComment = false + _, _ = output.WriteRune('\n') + } + } else if inQuote { + // If we're currently in a quote, check if this character ends the quote + if char == quoteChar && !escaped { + // End of quote, we're no longer in a quote + inQuote = false + } + output.WriteByte(char) + } else { + // Not in a comment or a quote + if (char == '"' || char == '\'') && !escaped { + // Start of quote + inQuote = true + quoteChar = char + output.WriteByte(char) + } else if char == '#' && !escaped { + // Start of comment + inComment = true + } else { + // Not a comment or a quote, just output the character + output.WriteByte(char) + } + } + + // Reset escaping if any other character occurred + if char != '\\' { + escaped = false + } + } + return output.Bytes(), nil +} + +func substituteEnvironment(contents []byte, oldReplacementBehavior bool) ([]byte, error) { + options := []template.Option{ + template.WithReplacementFunction(func(s string, m template.Mapping, cfg *template.Config) (string, error) { + result, applied, err := template.DefaultReplacementAppliedFunc(s, m, cfg) + if err == nil && !applied { + // Keep undeclared environment-variable patterns to reproduce + // pre-v1.27 behavior + return s, nil + } + if err != nil && strings.HasPrefix(err.Error(), "Invalid template:") { + // Keep invalid template patterns to ignore regexp substitutions + // like ${1} + return s, nil + } + return result, err + }), + template.WithoutLogging, + } + if oldReplacementBehavior { + options = append(options, template.WithPattern(oldVarRe)) + } + + envMap := utils.GetAsEqualsMap(os.Environ()) + retVal, err := template.SubstituteWithOptions(string(contents), func(k string) (string, bool) { + if v, ok := envMap[k]; ok { + return v, ok + } + return "", false + }, options...) + return []byte(retVal), err +} From 23c57c249a85c2fbfacb0a44ef2628cc81331d13 Mon Sep 17 00:00:00 2001 From: Sven Rebhan Date: Thu, 2 Nov 2023 16:58:35 +0100 Subject: [PATCH 3/3] Fix comment removal with an actual state-machine --- config/envvar.go | 234 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 189 insertions(+), 45 deletions(-) diff --git a/config/envvar.go b/config/envvar.go index d65d8171b51d0..09f85a4ae04a7 100644 --- a/config/envvar.go +++ b/config/envvar.go @@ -11,69 +11,213 @@ import ( "github.com/compose-spec/compose-go/utils" ) -func removeComments(contents []byte) ([]byte, error) { - tomlReader := bytes.NewReader(contents) - - // Initialize variables for tracking state - var inQuote, inComment, escaped bool - var quoteChar byte +type trimmer struct { + input *bytes.Reader + output bytes.Buffer +} - // Initialize buffer for modified TOML data - var output bytes.Buffer +func removeComments(buf []byte) ([]byte, error) { + t := &trimmer{ + input: bytes.NewReader(buf), + output: bytes.Buffer{}, + } + err := t.process() + return t.output.Bytes(), err +} - buf := make([]byte, 1) - // Iterate over each character in the file +func (t *trimmer) process() error { for { - _, err := tomlReader.Read(buf) + // Read the next byte until EOF + c, err := t.input.ReadByte() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } + + // Switch states if we need to + switch c { + case '\\': + _ = t.input.UnreadByte() + err = t.escape() + case '\'': + _ = t.input.UnreadByte() + if t.hasNQuotes(c, 3) { + err = t.tripleSingleQuote() + } else { + err = t.singleQuote() + } + case '"': + _ = t.input.UnreadByte() + if t.hasNQuotes(c, 3) { + err = t.tripleDoubleQuote() + } else { + err = t.doubleQuote() + } + case '#': + err = t.comment() + default: + if err := t.output.WriteByte(c); err != nil { + return err + } + continue + } if err != nil { if errors.Is(err, io.EOF) { break } - return nil, err + return err + } + } + return nil +} + +func (t *trimmer) hasNQuotes(ref byte, limit int64) bool { + var count int64 + // Look ahead check if the next characters are what we expect + for count = 0; count < limit; count++ { + c, err := t.input.ReadByte() + if err != nil || c != ref { + break + } + } + // We also need to unread the non-matching character + offset := -count + if count < limit { + offset-- + } + // Unread the matched characters + _, _ = t.input.Seek(offset, io.SeekCurrent) + return count >= limit +} + +func (t *trimmer) readWriteByte() (byte, error) { + c, err := t.input.ReadByte() + if err != nil { + return 0, err + } + return c, t.output.WriteByte(c) +} + +func (t *trimmer) escape() error { + // Consumer the known starting backslash and quote + _, _ = t.readWriteByte() + + // Read the next character which is the escaped one and exit + _, err := t.readWriteByte() + return err +} + +func (t *trimmer) singleQuote() error { + // Consumer the known starting quote + _, _ = t.readWriteByte() + + // Read bytes until EOF, line end or another single quote + for { + if c, err := t.readWriteByte(); err != nil || c == '\'' || c == '\n' { + return err + } + } +} + +func (t *trimmer) tripleSingleQuote() error { + for i := 0; i < 3; i++ { + // Consumer the known starting quotes + _, _ = t.readWriteByte() + } + + // Read bytes until EOF or another set of triple single quotes + for { + c, err := t.readWriteByte() + if err != nil { + return err } - char := buf[0] - // Toggle the escaped state at backslash to we have true every odd occurrence. - if char == '\\' { - escaped = !escaped + if c == '\'' && t.hasNQuotes('\'', 2) { + // Consumer the two additional ending quotes + _, _ = t.readWriteByte() + _, _ = t.readWriteByte() + return nil } + } +} + +func (t *trimmer) doubleQuote() error { + // Consumer the known starting quote + _, _ = t.readWriteByte() - if inComment { - // If we're currently in a comment, check if this character ends the comment - if char == '\n' { - // End of line, comment is finished - inComment = false - _, _ = output.WriteRune('\n') + // Read bytes until EOF, line end or another double quote + for { + c, err := t.input.ReadByte() + if err != nil { + return err + } + switch c { + case '\\': + // Found escaped character + _ = t.input.UnreadByte() + if err := t.escape(); err != nil { + return err } - } else if inQuote { - // If we're currently in a quote, check if this character ends the quote - if char == quoteChar && !escaped { - // End of quote, we're no longer in a quote - inQuote = false + continue + case '"', '\n': + // Found terminator + return t.output.WriteByte(c) + } + if err := t.output.WriteByte(c); err != nil { + return err + } + } +} + +func (t *trimmer) tripleDoubleQuote() error { + for i := 0; i < 3; i++ { + // Consumer the known starting quotes + _, _ = t.readWriteByte() + } + + // Read bytes until EOF or another set of triple double quotes + for { + c, err := t.input.ReadByte() + if err != nil { + return err + } + switch c { + case '\\': + // Found escaped character + _ = t.input.UnreadByte() + if err := t.escape(); err != nil { + return err } - output.WriteByte(char) - } else { - // Not in a comment or a quote - if (char == '"' || char == '\'') && !escaped { - // Start of quote - inQuote = true - quoteChar = char - output.WriteByte(char) - } else if char == '#' && !escaped { - // Start of comment - inComment = true - } else { - // Not a comment or a quote, just output the character - output.WriteByte(char) + continue + case '"': + _ = t.output.WriteByte(c) + if t.hasNQuotes('"', 2) { + // Consumer the two additional ending quotes + _, _ = t.readWriteByte() + _, _ = t.readWriteByte() + return nil } + continue } + if err := t.output.WriteByte(c); err != nil { + return err + } + } +} - // Reset escaping if any other character occurred - if char != '\\' { - escaped = false +func (t *trimmer) comment() error { + // Read bytes until EOF or a line break + for { + c, err := t.input.ReadByte() + if err != nil { + return err + } + if c == '\n' { + return t.output.WriteByte(c) } } - return output.Bytes(), nil } func substituteEnvironment(contents []byte, oldReplacementBehavior bool) ([]byte, error) {