From 61769d68f2df46c31d1fd1c6bbcca6c23676ef84 Mon Sep 17 00:00:00 2001 From: Nathaniel Schweinberg Date: Wed, 23 Oct 2019 20:57:06 -0500 Subject: [PATCH] update to be able to read insert queries over multiple lines --- Makefile | 7 ++-- anonymize-mysqldump.go | 68 ++++++++++++++++++++++++++++++------- anonymize-mysqldump_test.go | 33 +++++++++++++++--- 3 files changed, 89 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 4785561..52e6b84 100644 --- a/Makefile +++ b/Makefile @@ -17,11 +17,14 @@ lint: golint ./... | grep -v vendor | tee /dev/stderr test: - go test -v ./... + LOG_LEVEL=debug go test -v ./... go test -bench . test-watch: - fswatch -0 *.go | xargs -0 -L 1 -I % go test -v ./... + fswatch -0 *.go | xargs -0 -L 1 -I % sh -c 'LOG_LEVEL=debug go test -v ./...' + +run-test-watch: + fswatch -0 *.go | xargs -0 -L 1 -I % sh -c 'LOG_LEVEL=debug go test -v -run $(TEST)' clean: rm -rf ${BUILDDIR} diff --git a/anonymize-mysqldump.go b/anonymize-mysqldump.go index e6dff60..349feea 100644 --- a/anonymize-mysqldump.go +++ b/anonymize-mysqldump.go @@ -69,20 +69,25 @@ func init() { func main() { config := parseArgs() + lines := setupAndProcessInput(config, os.Stdin) + + for line := range lines { + fmt.Print(<-line) + } +} + +func setupAndProcessInput(config Config, input io.Reader) chan chan string { var wg sync.WaitGroup lines := make(chan chan string, 10) wg.Add(1) - go processFile(&wg, lines, config) + go processInput(&wg, input, lines, config) go func() { wg.Wait() close(lines) }() - - for line := range lines { - fmt.Print(<-line) - } + return lines } func parseArgs() Config { @@ -113,22 +118,54 @@ func readConfigFile(filepath string) Config { return decoded } -func processFile(wg *sync.WaitGroup, lines chan chan string, config Config) { +func processInput(wg *sync.WaitGroup, input io.Reader, lines chan chan string, config Config) { defer wg.Done() - r := bufio.NewReaderSize(os.Stdin, 2*1024*1024) + r := bufio.NewReaderSize(input, 2*1024*1024) + var nextLine string + insertStarted := false for { line, err := r.ReadString('\n') - if err == io.EOF { + // First, let's check for any non-EOF errors and break + if err != nil && err != io.EOF { + logrus.Error(err.Error()) break } - if err != nil { - logrus.Error(err.Error()) - break + // If the line is empty, just skip it + if len(line) == 0 { + if err == io.EOF { + break + } + continue + } + + // clean up whitespace + line = strings.TrimSpace(line) + + // Test if this is an INSERT query. We'll use this to determine if we need + // to concatenate lines together if they're spread apart multiple lines + // instead of on a single line + maybeInsert := strings.ToUpper(line[:6]) == "INSERT" + if maybeInsert { + insertStarted = true + } + + // Now that we've detected this is an INSERT query, let's append the lines + // together to form a single line in the event this spans multiple lines + if insertStarted { + nextLine += line } + lastCharacter := line[len(line)-1:] + if lastCharacter != ";" && insertStarted { + continue + } + + // Let's reset + insertStarted = false + wg.Add(1) ch := make(chan string) lines <- ch @@ -137,7 +174,14 @@ func processFile(wg *sync.WaitGroup, lines chan chan string, config Config) { defer wg.Done() line = processLine(line, config) ch <- line - }(line) + }(nextLine) + + // We wait until the very end to check if EOF because we may have reached + // EOF and `line` still have a value + if err == io.EOF { + logrus.Debug("Reached EOF, finished processing.") + break + } } } diff --git a/anonymize-mysqldump_test.go b/anonymize-mysqldump_test.go index 9a3f99a..bb310a8 100644 --- a/anonymize-mysqldump_test.go +++ b/anonymize-mysqldump_test.go @@ -1,20 +1,28 @@ package main import ( + "bytes" "syreclabs.com/go/faker" "testing" ) var ( - jsonConfig Config - commentsQuery = "INSERT INTO `wp_comments` VALUES (1,1,'A WordPress Commenter','wapuu@wordpress.example','https://wordpress.org/','','2019-06-12 00:59:19','2019-06-12 00:59:19','Hi, this is a comment.\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\nCommenter avatars come from Gravatar.',0,'1','','',0,0);\n" + jsonConfig Config + multilineQuery = `INSERT INTO wp_usermeta VALUES + (1,1,'first_name','John'),(2,1,'last_name','Doe'), + (3,1,'foobar','bazquz'), + (4,1,'nickname','Jim'), + (5,1,'description','Lorum ipsum.'); +` + multilineQueryRecompiled = "insert into wp_usermeta values (1, 1, 'first_name', 'Nat'), (2, 1, 'last_name', 'Hermiston'), (3, 1, 'foobar', 'bazquz'), (4, 1, 'nickname', 'Treva'), (5, 1, 'description', 'Enim odio nihil.');\n" + commentsQuery = "INSERT INTO `wp_comments` VALUES (1,1,'A WordPress Commenter','wapuu@wordpress.example','https://wordpress.org/','','2019-06-12 00:59:19','2019-06-12 00:59:19','Hi, this is a comment.\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\nCommenter avatars come from Gravatar.',0,'1','','',0,0);\n" // Don't forget to escape \ because it'll translate to a newline and not pass // the comparison test - commentsQueryRecompiled = "insert into wp_comments values (1, 1, 'kamren.ohara', 'michele_barton@example.net', 'http://ebert.com/korey_keeling', '', '2019-06-12 00:59:19', '2019-06-12 00:59:19', 'Hi, this is a comment.\\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\\nCommenter avatars come from Gravatar.', 0, '1', '', '', 0, 0);\n" + commentsQueryRecompiled = "insert into wp_comments values (1, 1, 'sam_harvey', 'jillian@example.com', 'http://balistreriwiegand.name/sunny', '', '2019-06-12 00:59:19', '2019-06-12 00:59:19', 'Hi, this is a comment.\\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\\nCommenter avatars come from Gravatar.', 0, '1', '', '', 0, 0);\n" usersQuery = "INSERT INTO `wp_users` VALUES (1,'username','user_pass','username','hosting@humanmade.com','','2019-06-12 00:59:19','',0,'username'),(2,'username','user_pass','username','hosting@humanmade.com','http://notreal.com/username','2019-06-12 00:59:19','',0,'username');\n" - usersQueryRecompiled = "insert into wp_users values (1, 'treva_cremin', 'NjaK5HeMAMuv', 'hailey', 'bernice.heaney@example.net', '', '2019-06-12 00:59:19', '', 0, 'Kylie Rice'), (2, 'eduardo', 'J3JRQ4XoIxXX6A', 'albert.okeefe', 'brooke.hayes@example.net', 'http://pfannerstill.net/brando', '2019-06-12 00:59:19', '', 0, 'Ardella Jenkins PhD');\n" + usersQueryRecompiled = "insert into wp_users values (1, 'fatima.fisher', 'abOSwkVS', 'lillian', 'grover@example.net', '', '2019-06-12 00:59:19', '', 0, 'Retta Bailey'), (2, 'juwan.kassulke', 'zgtEQA3nm4Wlro', 'evalyn', 'camilla.hilll@example.org', 'http://dickensmurphy.info/ophelia', '2019-06-12 00:59:19', '', 0, 'Rick Fahey III');\n" userMetaQuery = "INSERT INTO `wp_usermeta` VALUES (1,1,'first_name','John'),(2,1,'last_name','Doe'),(3,1,'foobar','bazquz'),(4,1,'nickname','Jim'),(5,1,'description','Lorum ipsum.'),(6,2,'first_name','Janet'),(7,2,'last_name','Doe'),(8,2,'foobar','bazquz'),(9,2,'nickname','Jane'),(10,2,'description','Lorum ipsum.');\n" - userMetaQueryRecompiled = "insert into wp_usermeta values (1, 1, 'first_name', 'Stephania'), (2, 1, 'last_name', 'Hamill'), (3, 1, 'foobar', 'bazquz'), (4, 1, 'nickname', 'Noah'), (5, 1, 'description', 'Dolorum nostrum alias.'), (6, 2, 'first_name', 'Ed'), (7, 2, 'last_name', 'Koelpin'), (8, 2, 'foobar', 'bazquz'), (9, 2, 'nickname', 'Watson'), (10, 2, 'description', 'Qui voluptatum est.');\n" + userMetaQueryRecompiled = "insert into wp_usermeta values (1, 1, 'first_name', 'Ed'), (2, 1, 'last_name', 'Koelpin'), (3, 1, 'foobar', 'bazquz'), (4, 1, 'nickname', 'Watson'), (5, 1, 'description', 'Qui voluptatum est.'), (6, 2, 'first_name', 'Olen'), (7, 2, 'last_name', 'Williamson'), (8, 2, 'foobar', 'bazquz'), (9, 2, 'nickname', 'Kamren'), (10, 2, 'description', 'Eveniet repellat in.');\n" ) func init() { @@ -30,6 +38,21 @@ func BenchmarkProcessLine(b *testing.B) { } } +func TestProcessFile(t *testing.T) { + input := bytes.NewBufferString(multilineQuery) + + lines := setupAndProcessInput(jsonConfig, input) + + var result string + for line := range lines { + result = <-line + } + + if result != multilineQueryRecompiled { + t.Error("\nExpected:\n", multilineQueryRecompiled, "\nActual:\n", result) + } +} + func TestApplyConfigToQuery(t *testing.T) { var tests = []struct {