Skip to content
This repository has been archived by the owner on Mar 12, 2024. It is now read-only.

Commit

Permalink
fix: parser handling comments and empty lines (pressly#446)
Browse files Browse the repository at this point in the history
  • Loading branch information
mfridman authored Jan 26, 2023
1 parent 2032773 commit bf26560
Show file tree
Hide file tree
Showing 16 changed files with 179 additions and 86 deletions.
81 changes: 46 additions & 35 deletions internal/sqlparser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"fmt"
"io"
"log"
"regexp"
"strings"
"sync"
)
Expand Down Expand Up @@ -37,11 +36,10 @@ func (s *stateMachine) Set(new parserState) {

const scanBufSize = 4 * 1024 * 1024

var matchEmptyLines = regexp.MustCompile(`^\s*$`)

var bufferPool = sync.Pool{
New: func() interface{} {
return make([]byte, scanBufSize)
buf := make([]byte, scanBufSize)
return &buf
},
}

Expand All @@ -56,29 +54,32 @@ var bufferPool = sync.Pool{
// 'StatementBegin' and 'StatementEnd' to allow the script to
// tell us to ignore semicolons.
func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool, err error) {
var buf bytes.Buffer
scanBuf := bufferPool.Get().([]byte)
defer bufferPool.Put(scanBuf)
scanBufPtr := bufferPool.Get().(*[]byte)
scanBuf := *scanBufPtr
defer bufferPool.Put(scanBufPtr)

scanner := bufio.NewScanner(r)
scanner.Buffer(scanBuf, scanBufSize)

stateMachine := stateMachine(start)
useTx = true

firstLineFound := false
var buf bytes.Buffer
for scanner.Scan() {
line := scanner.Text()
if verbose {
log.Println(line)
}

if stateMachine.Get() == start && strings.TrimSpace(line) == "" {
continue
}
// TODO(mf): validate annotations to avoid common user errors:
// https://github.com/pressly/goose/issues/163#issuecomment-501736725
if strings.HasPrefix(line, "--") {
cmd := strings.TrimSpace(strings.TrimPrefix(line, "--"))

switch cmd {
case "+goose Up":
firstLineFound = true
switch stateMachine.Get() {
case start:
stateMachine.Set(gooseUp)
Expand All @@ -88,7 +89,6 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
continue

case "+goose Down":
firstLineFound = true
switch stateMachine.Get() {
case gooseUp, gooseStatementEndUp:
stateMachine.Set(gooseDown)
Expand All @@ -98,7 +98,6 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
continue

case "+goose StatementBegin":
firstLineFound = true
switch stateMachine.Get() {
case gooseUp, gooseStatementEndUp:
stateMachine.Set(gooseStatementBeginUp)
Expand All @@ -110,7 +109,6 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
continue

case "+goose StatementEnd":
firstLineFound = true
switch stateMachine.Get() {
case gooseStatementBeginUp:
stateMachine.Set(gooseStatementEndUp)
Expand All @@ -123,25 +121,27 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
case "+goose NO TRANSACTION":
useTx = false
continue

default:
// Ignore comments.
}
}
// Once we've started parsing a statement the buffer is no longer empty,
// we keep all comments up until the end of the statement (the buffer will be reset).
// All other comments in the file are ignored.
if buf.Len() == 0 {
// This check ensures leading comments and empty lines prior to a statement are ignored.
if strings.HasPrefix(strings.TrimSpace(line), "--") || line == "" {
verboseInfo("StateMachine: ignore comment")
continue
}
}

// Ignore empty lines until first line is found.
if !firstLineFound && matchEmptyLines.MatchString(line) {
verboseInfo("StateMachine: ignore empty line")
continue
}

// Write SQL line to a buffer.
if _, err := buf.WriteString(line + "\n"); err != nil {
return nil, false, fmt.Errorf("failed to write to buf: %w", err)
switch stateMachine.Get() {
case gooseStatementEndDown, gooseStatementEndUp:
// Do not include the "+goose StatementEnd" annotation in the final statement.
default:
// Write SQL line to a buffer.
if _, err := buf.WriteString(line + "\n"); err != nil {
return nil, false, fmt.Errorf("failed to write to buf: %w", err)
}
}

// Read SQL body one by line, if we're in the right direction.
//
// 1) basic query with semicolon; 2) psql statement
Expand All @@ -161,29 +161,29 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
continue
}
default:
return nil, false, fmt.Errorf("failed to parse migration: unexpected state %q on line %q, see https://github.com/pressly/goose#sql-migrations", stateMachine, line)
return nil, false, fmt.Errorf("failed to parse migration: unexpected state %d on line %q, see https://github.com/pressly/goose#sql-migrations", stateMachine, line)
}

switch stateMachine.Get() {
case gooseUp:
if endsWithSemicolon(line) {
stmts = append(stmts, buf.String())
stmts = append(stmts, cleanupStatement(buf.String()))
buf.Reset()
verboseInfo("StateMachine: store simple Up query")
}
case gooseDown:
if endsWithSemicolon(line) {
stmts = append(stmts, buf.String())
stmts = append(stmts, cleanupStatement(buf.String()))
buf.Reset()
verboseInfo("StateMachine: store simple Down query")
}
case gooseStatementEndUp:
stmts = append(stmts, buf.String())
stmts = append(stmts, cleanupStatement(buf.String()))
buf.Reset()
verboseInfo("StateMachine: store Up statement")
stateMachine.Set(gooseUp)
case gooseStatementEndDown:
stmts = append(stmts, buf.String())
stmts = append(stmts, cleanupStatement(buf.String()))
buf.Reset()
verboseInfo("StateMachine: store Down statement")
stateMachine.Set(gooseDown)
Expand All @@ -202,17 +202,28 @@ func ParseSQLMigration(r io.Reader, direction bool) (stmts []string, useTx bool,
}

if bufferRemaining := strings.TrimSpace(buf.String()); len(bufferRemaining) > 0 {
return nil, false, fmt.Errorf("failed to parse migration: state %q, direction: %v: unexpected unfinished SQL query: %q: missing semicolon?", stateMachine, direction, bufferRemaining)
return nil, false, fmt.Errorf("failed to parse migration: state %d, direction: %v: unexpected unfinished SQL query: %q: missing semicolon?", stateMachine, direction, bufferRemaining)
}

return stmts, useTx, nil
}

// cleanupStatement attempts to find the last semicolon and trims
// the remaining chars from the input string. This is useful for cleaning
// up a statement containing trailing comments or empty lines.
func cleanupStatement(input string) string {
if n := strings.LastIndex(input, ";"); n > 0 {
return input[:n+1]
}
return input
}

// Checks the line to see if the line has a statement-ending semicolon
// or if the line contains a double-dash comment.
func endsWithSemicolon(line string) bool {
scanBuf := bufferPool.Get().([]byte)
defer bufferPool.Put(scanBuf)
scanBufPtr := bufferPool.Get().(*[]byte)
scanBuf := *scanBufPtr
defer bufferPool.Put(scanBufPtr)

prev := ""
scanner := bufio.NewScanner(strings.NewReader(line))
Expand Down
57 changes: 11 additions & 46 deletions internal/sqlparser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package sqlparser

import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
Expand Down Expand Up @@ -79,25 +78,6 @@ func TestSplitStatements(t *testing.T) {
}
}

func TestKeepEmptyLines(t *testing.T) {
stmts, _, err := ParseSQLMigration(strings.NewReader(emptyLineSQL), true)
if err != nil {
t.Errorf("Failed to parse SQL migration. %v", err)
}
expected := `INSERT INTO post (id, title, body)
VALUES ('id_01', 'my_title', '
this is an insert statement including empty lines.
empty (blank) lines can be meaningful.
leave the lines to keep the text syntax.
');
`
if stmts[0] != expected {
t.Errorf("incorrect stmts. got %v, want %v", stmts, expected)
}
}

func TestUseTransactions(t *testing.T) {
t.Parallel()

Expand Down Expand Up @@ -162,20 +142,6 @@ SELECT 4; -- 4th stmt
DROP TABLE post; -- 1st stmt
`

var emptyLineSQL = `-- +goose Up
INSERT INTO post (id, title, body)
VALUES ('id_01', 'my_title', '
this is an insert statement including empty lines.
empty (blank) lines can be meaningful.
leave the lines to keep the text syntax.
');
-- +goose Down
TRUNCATE TABLE post;
`

var functxt = `-- +goose Up
CREATE TABLE IF NOT EXISTS histories (
id BIGSERIAL PRIMARY KEY,
Expand Down Expand Up @@ -401,9 +367,10 @@ func TestValidUp(t *testing.T) {
{Name: "test01", StatementsCount: 3},
{Name: "test02", StatementsCount: 1},
{Name: "test03", StatementsCount: 1},
{Name: "test04", StatementsCount: 2},
{Name: "test04", StatementsCount: 3},
{Name: "test05", StatementsCount: 2},
{Name: "test06", StatementsCount: 3},
{Name: "test06", StatementsCount: 5},
{Name: "test07", StatementsCount: 1},
}
for _, tc := range tests {
path := filepath.Join("testdata", "valid-up", tc.Name)
Expand All @@ -428,24 +395,22 @@ func testValidUp(t *testing.T, dir string, count int) {
func compareStatements(t *testing.T, dir string, statements []string) {
t.Helper()

files, err := os.ReadDir(dir)
files, err := filepath.Glob(filepath.Join(dir, "*.golden.sql"))
check.NoError(t, err)
if len(statements) != len(files) {
t.Fatalf("mismatch between parsed statements (%d) and golden files (%d), did you check in NN.golden.sql file in %q?", len(statements), len(files), dir)
}
for _, goldenFile := range files {
if goldenFile.Name() == "input.sql" {
continue
}
if !strings.HasSuffix(goldenFile.Name(), ".golden.sql") {
t.Fatalf("expecting golden file with format <name>.golden.sql: got: %q. Try running `make clean` to remove previous failed files?", goldenFile.Name())
}
before, _, ok := cut(goldenFile.Name(), ".")
goldenFile = filepath.Base(goldenFile)
before, _, ok := cut(goldenFile, ".")
if !ok {
t.Fatal(`failed to cut on file delimiter ".", must be of the format NN.golden.sql`)
}
index, err := strconv.Atoi(before)
check.NoError(t, err)
index--

goldenFilePath := filepath.Join(dir, goldenFile.Name())
goldenFilePath := filepath.Join(dir, goldenFile)
by, err := os.ReadFile(goldenFilePath)
check.NoError(t, err)

Expand All @@ -460,7 +425,7 @@ func compareStatements(t *testing.T, dir string, statements []string) {
filepath.Join("internal", "sqlparser", goldenFilePath+".FAIL"),
filepath.Join("internal", "sqlparser", goldenFilePath),
)
err := ioutil.WriteFile(goldenFilePath+".FAIL", []byte(got+"\n"), 0644)
err := os.WriteFile(goldenFilePath+".FAIL", []byte(got+"\n"), 0644)
check.NoError(t, err)
}
}
Expand Down
1 change: 0 additions & 1 deletion internal/sqlparser/testdata/valid-up/test01/02.golden.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@ CREATE FUNCTION emp_stamp() RETURNS trigger AS $emp_stamp$
RETURN NEW;
END;
$emp_stamp$ LANGUAGE plpgsql;
-- +goose StatementEnd
1 change: 0 additions & 1 deletion internal/sqlparser/testdata/valid-up/test02/01.golden.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,3 @@ $emp_audit$ LANGUAGE plpgsql;
CREATE TRIGGER emp_audit
AFTER INSERT OR UPDATE OR DELETE ON emp
FOR EACH ROW EXECUTE FUNCTION process_emp_audit();
-- +goose StatementEnd
5 changes: 4 additions & 1 deletion internal/sqlparser/testdata/valid-up/test03/01.golden.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ BEGIN
v_url ALIAS FOR $3;
BEGIN '';
--
-- Notice how we scan through the results of a query in a FOR loop
-- using the FOR <record> construct.
--
FOR referrer_keys IN SELECT * FROM cs_referrer_keys ORDER BY try_order LOOP
a_output := a_output || '' IF v_'' || referrer_keys.kind || '' LIKE ''''''''''
Expand All @@ -26,4 +30,3 @@ BEGIN
EXECUTE a_output;
END;
' LANGUAGE 'plpgsql';
-- +goose StatementEnd
3 changes: 2 additions & 1 deletion internal/sqlparser/testdata/valid-up/test03/input.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ BEGIN
EXECUTE a_output;
END;
' LANGUAGE 'plpgsql';
' LANGUAGE 'plpgsql'; -- This comment will NOT be preserved.
-- And neither will this one.
-- +goose StatementEnd

-- +goose Down
2 changes: 1 addition & 1 deletion internal/sqlparser/testdata/valid-up/test04/02.golden.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ M7Zifl2NyVVCPb0hELr2JJla1u/1CgiuqDpcjP2cCu2YxB/JGyCvcon+3tETUz3Ri9NGzHCZ
fkuWRZjkUvy7nfPLjzM+t6SEvY4lbn3ihLPumZjwgvuCY3vDZY8V1/NMoP8MKATGR+S7D7gv
I6KD9jkiSsTJMiotb/dRkXE3bG0nmjchhhLzMG551G8IZEpWBHDqEisCIl8yCd9YZV69BZTu
L48zPl/CFvA+KJJ6LklxfwWeVDQ+ve2OIW0B1uLhR/MsoYbDQztbgIayg6ieMO/KlQIDAQAB
-----END RSA PUBLIC KEY-----
');
-- +goose StatementEnd
22 changes: 22 additions & 0 deletions internal/sqlparser/testdata/valid-up/test04/03.golden.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
INSERT INTO ssh_keys (id, "publicKey") VALUES (2, '-----BEGIN OPENSSH PRIVATE KEY-----
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABsQAAAAdzc2gtZH
NzAAAAgQDKYlGRv+Ul4WYKN3F83zchQC24ZV17EJCKwakASf//u2p1+K4iuTQbPgcUWpET
w4HkCRlOVwYkM4ZL2QncUDyEX3o0UDEgrnhWhLJJR2yCMSsqTNffME3X7YdP2LcE0OM9ZP
9vb5+TwLr6c4edwlu3QYc4VStWSstdR9DD7vun9QAAABUA+DRjW9u+5IEPHyx+FhEoKsRm
8aEAAACAU2auArSDuTVbJBE/oP6x+4vrud2qFtALsuFaLPfqN5gVBofRJlIOpQobyi022R
6SPtX/DEbWkVuHfiyxkjb0Gb0obBzGM+RNqjOF6OE9sh7OuBfLaWW44OZasg1VXzkRcoWv
7jClfKYi2Q/LxHGhZoqy1uYlHPYP5CmCpiELrUMAAACAfp0KpTVyYQjO2nLPuBhnsepfxQ
kT+FDqDBp1rZfB4uKx4q466Aq0jeev1OeQEYZpj3+q4b2XX54zXDwvJLuiD9WSmC7jvT0+
EUmF55PHW4inloG9pMUzeQnx3k8WDcRJbcAMalpoCCsb0jEPIiyBGBtQu0gOoLL+N+G2Cl
U+/FEAAAHgOKSlwjikpcIAAAAHc3NoLWRzcwAAAIEAymJRkb/lJeFmCjdxfN83IUAtuGVd
exCQisGpAEn//7tqdfiuIrk0Gz4HFFqRE8OB5AkZTlcGJDOGS9kJ3FA8hF96NFAxIK54Vo
SySUdsgjErKkzX3zBN1+2HT9i3BNDjPWT/b2+fk8C6+nOHncJbt0GHOFUrVkrLXUfQw+77
p/UAAAAVAPg0Y1vbvuSBDx8sfhYRKCrEZvGhAAAAgFNmrgK0g7k1WyQRP6D+sfuL67ndqh
bQC7LhWiz36jeYFQaH0SZSDqUKG8otNtkekj7V/wxG1pFbh34ssZI29Bm9KGwcxjPkTaoz
hejhPbIezrgXy2lluODmWrINVV85EXKFr+4wpXymItkPy8RxoWaKstbmJRz2D+QpgqYhC6
1DAAAAgH6dCqU1cmEIztpyz7gYZ7HqX8UJE/hQ6gwada2XweLiseKuOugKtI3nr9TnkBGG
aY9/quG9l1+eM1w8LyS7og/Vkpgu4709PhFJheeTx1uIp5aBvaTFM3kJ8d5PFg3ESW3ADG
paaAgrG9IxDyIsgRgbULtIDqCy/jfhtgpVPvxRAAAAFQCPXzpVtY5yJTN1zBo9pTGeg+f3
EgAAAAZub25hbWUBAgME
-----END OPENSSH PRIVATE KEY-----
');
23 changes: 23 additions & 0 deletions internal/sqlparser/testdata/valid-up/test04/input.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,27 @@ L48zPl/CFvA+KJJ6LklxfwWeVDQ+ve2OIW0B1uLhR/MsoYbDQztbgIayg6ieMO/KlQIDAQAB
');
-- +goose StatementEnd

INSERT INTO ssh_keys (id, "publicKey") VALUES (2, '-----BEGIN OPENSSH PRIVATE KEY-----
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABsQAAAAdzc2gtZH
NzAAAAgQDKYlGRv+Ul4WYKN3F83zchQC24ZV17EJCKwakASf//u2p1+K4iuTQbPgcUWpET
w4HkCRlOVwYkM4ZL2QncUDyEX3o0UDEgrnhWhLJJR2yCMSsqTNffME3X7YdP2LcE0OM9ZP
9vb5+TwLr6c4edwlu3QYc4VStWSstdR9DD7vun9QAAABUA+DRjW9u+5IEPHyx+FhEoKsRm
8aEAAACAU2auArSDuTVbJBE/oP6x+4vrud2qFtALsuFaLPfqN5gVBofRJlIOpQobyi022R
6SPtX/DEbWkVuHfiyxkjb0Gb0obBzGM+RNqjOF6OE9sh7OuBfLaWW44OZasg1VXzkRcoWv
7jClfKYi2Q/LxHGhZoqy1uYlHPYP5CmCpiELrUMAAACAfp0KpTVyYQjO2nLPuBhnsepfxQ
kT+FDqDBp1rZfB4uKx4q466Aq0jeev1OeQEYZpj3+q4b2XX54zXDwvJLuiD9WSmC7jvT0+
EUmF55PHW4inloG9pMUzeQnx3k8WDcRJbcAMalpoCCsb0jEPIiyBGBtQu0gOoLL+N+G2Cl
U+/FEAAAHgOKSlwjikpcIAAAAHc3NoLWRzcwAAAIEAymJRkb/lJeFmCjdxfN83IUAtuGVd
exCQisGpAEn//7tqdfiuIrk0Gz4HFFqRE8OB5AkZTlcGJDOGS9kJ3FA8hF96NFAxIK54Vo
SySUdsgjErKkzX3zBN1+2HT9i3BNDjPWT/b2+fk8C6+nOHncJbt0GHOFUrVkrLXUfQw+77
p/UAAAAVAPg0Y1vbvuSBDx8sfhYRKCrEZvGhAAAAgFNmrgK0g7k1WyQRP6D+sfuL67ndqh
bQC7LhWiz36jeYFQaH0SZSDqUKG8otNtkekj7V/wxG1pFbh34ssZI29Bm9KGwcxjPkTaoz
hejhPbIezrgXy2lluODmWrINVV85EXKFr+4wpXymItkPy8RxoWaKstbmJRz2D+QpgqYhC6
1DAAAAgH6dCqU1cmEIztpyz7gYZ7HqX8UJE/hQ6gwada2XweLiseKuOugKtI3nr9TnkBGG
aY9/quG9l1+eM1w8LyS7og/Vkpgu4709PhFJheeTx1uIp5aBvaTFM3kJ8d5PFg3ESW3ADG
paaAgrG9IxDyIsgRgbULtIDqCy/jfhtgpVPvxRAAAAFQCPXzpVtY5yJTN1zBo9pTGeg+f3
EgAAAAZub25hbWUBAgME
-----END OPENSSH PRIVATE KEY-----
');

-- +goose Down
Loading

0 comments on commit bf26560

Please sign in to comment.