Skip to content

Commit

Permalink
update to be able to read insert queries over multiple lines
Browse files Browse the repository at this point in the history
  • Loading branch information
nathanielks committed Oct 24, 2019
1 parent 81ffa9d commit 61769d6
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 19 deletions.
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@ lint:
golint ./... | grep -v vendor | tee /dev/stderr

test:
go test -v ./...
LOG_LEVEL=debug go test -v ./...
go test -bench .

test-watch:
fswatch -0 *.go | xargs -0 -L 1 -I % go test -v ./...
fswatch -0 *.go | xargs -0 -L 1 -I % sh -c 'LOG_LEVEL=debug go test -v ./...'

run-test-watch:
fswatch -0 *.go | xargs -0 -L 1 -I % sh -c 'LOG_LEVEL=debug go test -v -run $(TEST)'

clean:
rm -rf ${BUILDDIR}
Expand Down
68 changes: 56 additions & 12 deletions anonymize-mysqldump.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,25 @@ func init() {
func main() {
config := parseArgs()

lines := setupAndProcessInput(config, os.Stdin)

for line := range lines {
fmt.Print(<-line)
}
}

func setupAndProcessInput(config Config, input io.Reader) chan chan string {
var wg sync.WaitGroup
lines := make(chan chan string, 10)

wg.Add(1)
go processFile(&wg, lines, config)
go processInput(&wg, input, lines, config)

go func() {
wg.Wait()
close(lines)
}()

for line := range lines {
fmt.Print(<-line)
}
return lines
}

func parseArgs() Config {
Expand Down Expand Up @@ -113,22 +118,54 @@ func readConfigFile(filepath string) Config {
return decoded
}

func processFile(wg *sync.WaitGroup, lines chan chan string, config Config) {
func processInput(wg *sync.WaitGroup, input io.Reader, lines chan chan string, config Config) {
defer wg.Done()

r := bufio.NewReaderSize(os.Stdin, 2*1024*1024)
r := bufio.NewReaderSize(input, 2*1024*1024)
var nextLine string
insertStarted := false
for {
line, err := r.ReadString('\n')

if err == io.EOF {
// First, let's check for any non-EOF errors and break
if err != nil && err != io.EOF {
logrus.Error(err.Error())
break
}

if err != nil {
logrus.Error(err.Error())
break
// If the line is empty, just skip it
if len(line) == 0 {
if err == io.EOF {
break
}
continue
}

// clean up whitespace
line = strings.TrimSpace(line)

// Test if this is an INSERT query. We'll use this to determine if we need
// to concatenate lines together if they're spread apart multiple lines
// instead of on a single line
maybeInsert := strings.ToUpper(line[:6]) == "INSERT"
if maybeInsert {
insertStarted = true
}

// Now that we've detected this is an INSERT query, let's append the lines
// together to form a single line in the event this spans multiple lines
if insertStarted {
nextLine += line
}

lastCharacter := line[len(line)-1:]
if lastCharacter != ";" && insertStarted {
continue
}

// Let's reset
insertStarted = false

wg.Add(1)
ch := make(chan string)
lines <- ch
Expand All @@ -137,7 +174,14 @@ func processFile(wg *sync.WaitGroup, lines chan chan string, config Config) {
defer wg.Done()
line = processLine(line, config)
ch <- line
}(line)
}(nextLine)

// We wait until the very end to check if EOF because we may have reached
// EOF and `line` still have a value
if err == io.EOF {
logrus.Debug("Reached EOF, finished processing.")
break
}
}
}

Expand Down
33 changes: 28 additions & 5 deletions anonymize-mysqldump_test.go
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
package main

import (
"bytes"
"syreclabs.com/go/faker"
"testing"
)

var (
jsonConfig Config
commentsQuery = "INSERT INTO `wp_comments` VALUES (1,1,'A WordPress Commenter','[email protected]','https://wordpress.org/','','2019-06-12 00:59:19','2019-06-12 00:59:19','Hi, this is a comment.\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\nCommenter avatars come from <a href=\"https://gravatar.com\">Gravatar</a>.',0,'1','','',0,0);\n"
jsonConfig Config
multilineQuery = `INSERT INTO wp_usermeta VALUES
(1,1,'first_name','John'),(2,1,'last_name','Doe'),
(3,1,'foobar','bazquz'),
(4,1,'nickname','Jim'),
(5,1,'description','Lorum ipsum.');
`
multilineQueryRecompiled = "insert into wp_usermeta values (1, 1, 'first_name', 'Nat'), (2, 1, 'last_name', 'Hermiston'), (3, 1, 'foobar', 'bazquz'), (4, 1, 'nickname', 'Treva'), (5, 1, 'description', 'Enim odio nihil.');\n"
commentsQuery = "INSERT INTO `wp_comments` VALUES (1,1,'A WordPress Commenter','[email protected]','https://wordpress.org/','','2019-06-12 00:59:19','2019-06-12 00:59:19','Hi, this is a comment.\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\nCommenter avatars come from <a href=\"https://gravatar.com\">Gravatar</a>.',0,'1','','',0,0);\n"
// Don't forget to escape \ because it'll translate to a newline and not pass
// the comparison test
commentsQueryRecompiled = "insert into wp_comments values (1, 1, 'kamren.ohara', 'michele_barton@example.net', 'http://ebert.com/korey_keeling', '', '2019-06-12 00:59:19', '2019-06-12 00:59:19', 'Hi, this is a comment.\\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\\nCommenter avatars come from <a href=\\\"https://gravatar.com\\\">Gravatar</a>.', 0, '1', '', '', 0, 0);\n"
commentsQueryRecompiled = "insert into wp_comments values (1, 1, 'sam_harvey', 'jillian@example.com', 'http://balistreriwiegand.name/sunny', '', '2019-06-12 00:59:19', '2019-06-12 00:59:19', 'Hi, this is a comment.\\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\\nCommenter avatars come from <a href=\\\"https://gravatar.com\\\">Gravatar</a>.', 0, '1', '', '', 0, 0);\n"
usersQuery = "INSERT INTO `wp_users` VALUES (1,'username','user_pass','username','[email protected]','','2019-06-12 00:59:19','',0,'username'),(2,'username','user_pass','username','[email protected]','http://notreal.com/username','2019-06-12 00:59:19','',0,'username');\n"
usersQueryRecompiled = "insert into wp_users values (1, 'treva_cremin', 'NjaK5HeMAMuv', 'hailey', 'bernice.heaney@example.net', '', '2019-06-12 00:59:19', '', 0, 'Kylie Rice'), (2, 'eduardo', 'J3JRQ4XoIxXX6A', 'albert.okeefe', 'brooke.hayes@example.net', 'http://pfannerstill.net/brando', '2019-06-12 00:59:19', '', 0, 'Ardella Jenkins PhD');\n"
usersQueryRecompiled = "insert into wp_users values (1, 'fatima.fisher', 'abOSwkVS', 'lillian', 'grover@example.net', '', '2019-06-12 00:59:19', '', 0, 'Retta Bailey'), (2, 'juwan.kassulke', 'zgtEQA3nm4Wlro', 'evalyn', 'camilla.hilll@example.org', 'http://dickensmurphy.info/ophelia', '2019-06-12 00:59:19', '', 0, 'Rick Fahey III');\n"
userMetaQuery = "INSERT INTO `wp_usermeta` VALUES (1,1,'first_name','John'),(2,1,'last_name','Doe'),(3,1,'foobar','bazquz'),(4,1,'nickname','Jim'),(5,1,'description','Lorum ipsum.'),(6,2,'first_name','Janet'),(7,2,'last_name','Doe'),(8,2,'foobar','bazquz'),(9,2,'nickname','Jane'),(10,2,'description','Lorum ipsum.');\n"
userMetaQueryRecompiled = "insert into wp_usermeta values (1, 1, 'first_name', 'Stephania'), (2, 1, 'last_name', 'Hamill'), (3, 1, 'foobar', 'bazquz'), (4, 1, 'nickname', 'Noah'), (5, 1, 'description', 'Dolorum nostrum alias.'), (6, 2, 'first_name', 'Ed'), (7, 2, 'last_name', 'Koelpin'), (8, 2, 'foobar', 'bazquz'), (9, 2, 'nickname', 'Watson'), (10, 2, 'description', 'Qui voluptatum est.');\n"
userMetaQueryRecompiled = "insert into wp_usermeta values (1, 1, 'first_name', 'Ed'), (2, 1, 'last_name', 'Koelpin'), (3, 1, 'foobar', 'bazquz'), (4, 1, 'nickname', 'Watson'), (5, 1, 'description', 'Qui voluptatum est.'), (6, 2, 'first_name', 'Olen'), (7, 2, 'last_name', 'Williamson'), (8, 2, 'foobar', 'bazquz'), (9, 2, 'nickname', 'Kamren'), (10, 2, 'description', 'Eveniet repellat in.');\n"
)

func init() {
Expand All @@ -30,6 +38,21 @@ func BenchmarkProcessLine(b *testing.B) {
}
}

func TestProcessFile(t *testing.T) {
input := bytes.NewBufferString(multilineQuery)

lines := setupAndProcessInput(jsonConfig, input)

var result string
for line := range lines {
result = <-line
}

if result != multilineQueryRecompiled {
t.Error("\nExpected:\n", multilineQueryRecompiled, "\nActual:\n", result)
}
}

func TestApplyConfigToQuery(t *testing.T) {

var tests = []struct {
Expand Down

0 comments on commit 61769d6

Please sign in to comment.