-
-
Notifications
You must be signed in to change notification settings - Fork 380
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
452 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
package command | ||
|
||
import ( | ||
"bytes" | ||
"errors" | ||
"fmt" | ||
"strings" | ||
"unicode/utf8" | ||
) | ||
|
||
var ( | ||
ErrCommandEmpty = errors.New("command is empty") | ||
ErrSingleQuoteUnterminated = errors.New("unterminated single-quoted string") | ||
ErrDoubleQuoteUnterminated = errors.New("unterminated double-quoted string") | ||
ErrEscapeUnterminated = errors.New("unterminated backslash-escape") | ||
) | ||
|
||
// Split splits a command string into a slice of arguments. | ||
// This is especially important for commands such as: | ||
// /bin/sh -c "echo hello" | ||
// which should be split into: ["/bin/sh", "-c", "echo hello"] | ||
// It supports backslash-escapes, single-quotes and double-quotes. | ||
// It does not support: | ||
// - the $" quoting style. | ||
// - expansion (brace, shell or pathname). | ||
func Split(command string) (words []string, err error) { | ||
if command == "" { | ||
return nil, fmt.Errorf("%w", ErrCommandEmpty) | ||
} | ||
|
||
const bufferSize = 1024 | ||
buffer := bytes.NewBuffer(make([]byte, bufferSize)) | ||
|
||
startIndex := 0 | ||
|
||
for startIndex < len(command) { | ||
// skip any split characters at the start | ||
character, runeSize := utf8.DecodeRuneInString(command[startIndex:]) | ||
switch { | ||
case strings.ContainsRune(" \n\t", character): | ||
startIndex += runeSize | ||
case character == '\\': | ||
// Look ahead to eventually skip an escaped newline | ||
if command[startIndex+runeSize:] == "" { | ||
return nil, fmt.Errorf("%w: %q", ErrEscapeUnterminated, command) | ||
} | ||
character, runeSize := utf8.DecodeRuneInString(command[startIndex+runeSize:]) | ||
if character == '\n' { | ||
startIndex += runeSize + runeSize // backslash and newline | ||
} | ||
default: | ||
var word string | ||
buffer.Reset() | ||
word, startIndex, err = splitWord(command, startIndex, buffer) | ||
if err != nil { | ||
return nil, fmt.Errorf("splitting word in %q: %w", command, err) | ||
} | ||
words = append(words, word) | ||
} | ||
} | ||
return words, nil | ||
} | ||
|
||
// WARNING: buffer must be cleared before calling this function. | ||
func splitWord(input string, startIndex int, buffer *bytes.Buffer) ( | ||
word string, newStartIndex int, err error) { | ||
cursor := startIndex | ||
for cursor < len(input) { | ||
character, runeLength := utf8.DecodeRuneInString(input[cursor:]) | ||
cursor += runeLength | ||
if character == '"' || | ||
character == '\'' || | ||
character == '\\' || | ||
character == ' ' || | ||
character == '\n' || | ||
character == '\t' { | ||
buffer.WriteString(input[startIndex : cursor-runeLength]) | ||
} | ||
|
||
switch { | ||
case strings.ContainsRune(" \n\t", character): // spacing character | ||
return buffer.String(), cursor, nil | ||
case character == '"': | ||
return handleDoubleQuoted(input, cursor, buffer) | ||
case character == '\'': | ||
return handleSingleQuoted(input, cursor, buffer) | ||
case character == '\\': | ||
return handleEscaped(input, cursor, buffer) | ||
} | ||
} | ||
|
||
buffer.WriteString(input[startIndex:]) | ||
return buffer.String(), len(input), nil | ||
} | ||
|
||
func handleDoubleQuoted(input string, startIndex int, buffer *bytes.Buffer) ( | ||
word string, newStartIndex int, err error) { | ||
cursor := startIndex | ||
for cursor < len(input) { | ||
nextCharacter, nextRuneLength := utf8.DecodeRuneInString(input[cursor:]) | ||
cursor += nextRuneLength | ||
switch nextCharacter { | ||
case '"': // end of the double quoted string | ||
buffer.WriteString(input[startIndex : cursor-nextRuneLength]) | ||
return splitWord(input, cursor, buffer) | ||
case '\\': // escaped character | ||
escapedCharacter, escapedRuneLength := utf8.DecodeRuneInString(input[cursor:]) | ||
cursor += escapedRuneLength | ||
if !strings.ContainsRune("$`\"\n\\", escapedCharacter) { | ||
break | ||
} | ||
buffer.WriteString(input[startIndex : cursor-nextRuneLength-escapedRuneLength]) | ||
if escapedCharacter != '\n' { | ||
// skip backslash entirely for the newline character | ||
buffer.WriteRune(escapedCharacter) | ||
} | ||
startIndex = cursor | ||
} | ||
} | ||
return "", 0, fmt.Errorf("%w", ErrDoubleQuoteUnterminated) | ||
} | ||
|
||
func handleSingleQuoted(input string, startIndex int, buffer *bytes.Buffer) ( | ||
word string, newStartIndex int, err error) { | ||
closingQuoteIndex := strings.IndexRune(input[startIndex:], '\'') | ||
if closingQuoteIndex == -1 { | ||
return "", 0, fmt.Errorf("%w", ErrSingleQuoteUnterminated) | ||
} | ||
buffer.WriteString(input[startIndex : startIndex+closingQuoteIndex]) | ||
const singleQuoteRuneLength = 1 | ||
startIndex += closingQuoteIndex + singleQuoteRuneLength | ||
return splitWord(input, startIndex, buffer) | ||
} | ||
|
||
func handleEscaped(input string, startIndex int, buffer *bytes.Buffer) ( | ||
word string, newStartIndex int, err error) { | ||
if input[startIndex:] == "" { | ||
return "", 0, fmt.Errorf("%w", ErrEscapeUnterminated) | ||
} | ||
character, runeLength := utf8.DecodeRuneInString(input[startIndex:]) | ||
if character != '\n' { // backslash-escaped newline is ignored | ||
buffer.WriteString(input[startIndex : startIndex+runeLength]) | ||
} | ||
startIndex += runeLength | ||
return splitWord(input, startIndex, buffer) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
package command | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func Test_Split(t *testing.T) { | ||
t.Parallel() | ||
|
||
testCases := map[string]struct { | ||
command string | ||
words []string | ||
errWrapped error | ||
errMessage string | ||
}{ | ||
"empty": { | ||
command: "", | ||
errWrapped: ErrCommandEmpty, | ||
errMessage: "command is empty", | ||
}, | ||
"concrete_sh_command": { | ||
command: `/bin/sh -c "echo 123"`, | ||
words: []string{"/bin/sh", "-c", "echo 123"}, | ||
}, | ||
"single_word": { | ||
command: "word1", | ||
words: []string{"word1"}, | ||
}, | ||
"two_words_single_space": { | ||
command: "word1 word2", | ||
words: []string{"word1", "word2"}, | ||
}, | ||
"two_words_multiple_space": { | ||
command: "word1 word2", | ||
words: []string{"word1", "word2"}, | ||
}, | ||
"two_words_no_expansion": { | ||
command: "word1* word2?", | ||
words: []string{"word1*", "word2?"}, | ||
}, | ||
"escaped_single quote": { | ||
command: "ain\\'t good", | ||
words: []string{"ain't", "good"}, | ||
}, | ||
"escaped_single_quote_all_single_quoted": { | ||
command: "'ain'\\''t good'", | ||
words: []string{"ain't good"}, | ||
}, | ||
"empty_single_quoted": { | ||
command: "word1 '' word2", | ||
words: []string{"word1", "", "word2"}, | ||
}, | ||
"escaped_newline": { | ||
command: "word1\\\nword2", | ||
words: []string{"word1word2"}, | ||
}, | ||
"quoted_newline": { | ||
command: "text \"with\na\" quoted newline", | ||
words: []string{"text", "with\na", "quoted", "newline"}, | ||
}, | ||
"quoted_escaped_newline": { | ||
command: "\"word1\\d\\\\\\\" word2\\\nword3 word4\"", | ||
words: []string{"word1\\d\\\" word2word3 word4"}, | ||
}, | ||
"escaped_separated_newline": { | ||
command: "word1 \\\n word2", | ||
words: []string{"word1", "word2"}, | ||
}, | ||
"double_quotes_no_spacing": { | ||
command: "word1\"word2\"word3", | ||
words: []string{"word1word2word3"}, | ||
}, | ||
"unterminated_single_quote": { | ||
command: "'abc'\\''def", | ||
errWrapped: ErrSingleQuoteUnterminated, | ||
errMessage: `splitting word in "'abc'\\''def": unterminated single-quoted string`, | ||
}, | ||
"unterminated_double_quote": { | ||
command: "\"abc'def", | ||
errWrapped: ErrDoubleQuoteUnterminated, | ||
errMessage: `splitting word in "\"abc'def": unterminated double-quoted string`, | ||
}, | ||
"unterminated_escape": { | ||
command: "abc\\", | ||
errWrapped: ErrEscapeUnterminated, | ||
errMessage: `splitting word in "abc\\": unterminated backslash-escape`, | ||
}, | ||
"unterminated_escape_only": { | ||
command: " \\", | ||
errWrapped: ErrEscapeUnterminated, | ||
errMessage: `unterminated backslash-escape: " \\"`, | ||
}, | ||
} | ||
|
||
for name, testCase := range testCases { | ||
testCase := testCase | ||
t.Run(name, func(t *testing.T) { | ||
t.Parallel() | ||
|
||
words, err := Split(testCase.command) | ||
|
||
assert.Equal(t, testCase.words, words) | ||
assert.ErrorIs(t, err, testCase.errWrapped) | ||
if testCase.errWrapped != nil { | ||
assert.EqualError(t, err, testCase.errMessage) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.