diff --git a/go.mod b/go.mod index 7ede21f..fd4a81f 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ module github.com/buger/jsonparser -go 1.13 +go 1.18 diff --git a/parser.go b/parser.go index 76ec02d..55a09c2 100644 --- a/parser.go +++ b/parser.go @@ -380,6 +380,258 @@ func sameTree(p1, p2 []string) bool { const stackArraySize = 128 +func getRawValue(data []byte, offset int) ([]byte, int, error) { + endOffset := offset + + if data[offset] == '"' { + if idx, _ := stringEnd(data[offset+1:]); idx != -1 { + endOffset += idx + 1 + } else { + return nil, offset, MalformedStringError + } + } else if data[offset] == '[' { // if array value + // break label, for stopping nested loops + endOffset = blockEnd(data[offset:], '[', ']') + + if endOffset == -1 { + return nil, offset, MalformedArrayError + } + + endOffset += offset + } else if data[offset] == '{' { // if object value + // break label, for stopping nested loops + endOffset = blockEnd(data[offset:], '{', '}') + + if endOffset == -1 { + return nil, offset, MalformedObjectError + } + + endOffset += offset + } else { + // Number, Boolean or None + end := tokenEnd(data[endOffset:]) + + if end == -1 { + return nil, offset, MalformedValueError + } + + endOffset += end + } + // raw json can't be empty value + if endOffset <= offset { + return nil, offset, MalformedValueError + } + return data[offset:endOffset], endOffset, nil +} + +func internalRawGet(data []byte) (value []byte, err error) { + // Go to closest value + nO := nextToken(data) + if nO == -1 { + return nil, MalformedJsonError + } + + value, _, err = getRawValue(data, nO) + if err != nil { + return value, err + } + + return value[:len(value):len(value)], nil +} + +func EachRawKey(data []byte, cb func(int, []byte, error), paths ...[]string) int { + var x struct{} + var level, pathsMatched, i int + ln := len(data) + + pathFlags := make([]bool, stackArraySize)[:] + if len(paths) > cap(pathFlags) { + pathFlags = make([]bool, len(paths))[:] + } + pathFlags = pathFlags[0:len(paths)] + + var maxPath int + for _, p := range paths { + if len(p) > maxPath { + maxPath = len(p) + } + } + + pathsBuf := make([]string, stackArraySize)[:] + if maxPath > cap(pathsBuf) { + pathsBuf = make([]string, maxPath)[:] + } + pathsBuf = pathsBuf[0:maxPath] + + for i < ln { + switch data[i] { + case '"': + i++ + keyBegin := i + + strEnd, keyEscaped := stringEnd(data[i:]) + if strEnd == -1 { + return -1 + } + i += strEnd + + keyEnd := i - 1 + + valueOffset := nextToken(data[i:]) + if valueOffset == -1 { + return -1 + } + + i += valueOffset + + // if string is a key, and key level match + if data[i] == ':' { + match := -1 + key := data[keyBegin:keyEnd] + + // for unescape: if there are no escape sequences, this is cheap; if there are, it is a + // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize + var keyUnesc []byte + if !keyEscaped { + keyUnesc = key + } else { + var stackbuf [unescapeStackBufSize]byte + if ku, err := Unescape(key, stackbuf[:]); err != nil { + return -1 + } else { + keyUnesc = ku + } + } + + if maxPath >= level { + if level < 1 { + cb(-1, nil, MalformedJsonError) + return -1 + } + + pathsBuf[level-1] = bytesToString(&keyUnesc) + for pi, p := range paths { + if len(p) != level || pathFlags[pi] || !equalStr(&keyUnesc, p[level-1]) || !sameTree(p, pathsBuf[:level]) { + continue + } + + match = pi + + pathsMatched++ + pathFlags[pi] = true + + v, e := internalRawGet(data[i+1:]) + cb(pi, v, e) + + if pathsMatched == len(paths) { + break + } + } + if pathsMatched == len(paths) { + return i + } + } + + if match == -1 { + tokenOffset := nextToken(data[i+1:]) + i += tokenOffset + + if data[i] == '{' { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + 1 + } + } + + if i < ln { + switch data[i] { + case '{', '}', '[', '"': + i-- + } + } + } else { + i-- + } + case '{': + level++ + case '}': + level-- + case '[': + var ok bool + arrIdxFlags := make(map[int]struct{}) + + pIdxFlags := make([]bool, stackArraySize)[:] + if len(paths) > cap(pIdxFlags) { + pIdxFlags = make([]bool, len(paths))[:] + } + pIdxFlags = pIdxFlags[0:len(paths)] + + if level < 0 { + cb(-1, nil, MalformedJsonError) + return -1 + } + + for pi, p := range paths { + if len(p) < level+1 || pathFlags[pi] || p[level][0] != '[' || !sameTree(p, pathsBuf[:level]) { + continue + } + if len(p[level]) >= 2 { + aIdx, _ := strconv.Atoi(p[level][1 : len(p[level])-1]) + arrIdxFlags[aIdx] = x + pIdxFlags[pi] = true + } + } + + if len(arrIdxFlags) > 0 { + level++ + + var curIdx int + arrOff, _ := ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) { + if _, ok = arrIdxFlags[curIdx]; ok { + for pi, p := range paths { + if pIdxFlags[pi] { + aIdx, _ := strconv.Atoi(p[level-1][1 : len(p[level-1])-1]) + + if curIdx == aIdx { + of := searchKeys(value, p[level:]...) + + pathsMatched++ + pathFlags[pi] = true + + if of != -1 { + v, er := internalRawGet(value[of:]) + cb(pi, v, er) + } + } + } + } + } + + curIdx += 1 + }) + + if pathsMatched == len(paths) { + return i + } + + i += arrOff - 1 + } else { + // Do not search for keys inside arrays + if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 { + return -1 + } else { + i += arraySkip - 1 + } + } + case ']': + level-- + } + + i++ + } + + return -1 +} + func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]string) int { var x struct{} var level, pathsMatched, i int @@ -707,12 +959,10 @@ func WriteToBuffer(buffer []byte, str string) int { } /* - Del - Receives existing data structure, path to delete. Returns: `data` - return modified data - */ func Delete(data []byte, keys ...string) []byte { lk := len(keys) @@ -793,13 +1043,11 @@ func Delete(data []byte, keys ...string) []byte { } /* - Set - Receives existing data structure, path to set, and data to set at that key. Returns: `value` - modified byte array `err` - On any parsing error - */ func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error) { // ensure keys are set @@ -984,6 +1232,29 @@ func internalGet(data []byte, keys ...string) (value []byte, dataType ValueType, return value[:len(value):len(value)], dataType, offset, endOffset, nil } +func GetRaw(data []byte, keys ...string) (value []byte, offset int, err error) { + if len(keys) > 0 { + if offset = searchKeys(data, keys...); offset == -1 { + return nil, -1, KeyPathNotFoundError + } + } + + // Go to closest value + nO := nextToken(data[offset:]) + if nO == -1 { + return nil, -1, MalformedJsonError + } + + offset += nO + + value, offset, err = getRawValue(data, offset) + if err != nil { + return value, offset, err + } + + return value[:len(value):len(value)], offset, nil +} + // ArrayEach is used when iterating arrays, accepts a callback function with the same return arguments as `Get`. func ArrayEach(data []byte, cb func(value []byte, dataType ValueType, offset int, err error), keys ...string) (offset int, err error) { if len(data) == 0 { diff --git a/parser_test.go b/parser_test.go index 7036feb..588c787 100644 --- a/parser_test.go +++ b/parser_test.go @@ -880,6 +880,441 @@ var getTests = []GetTest{ }, } +var getRawTests = []GetTest{ + // Trivial tests + { + desc: "read string", + json: `""`, + isFound: true, + data: `""`, + }, + { + desc: "read number", + json: `0`, + isFound: true, + data: `0`, + }, + { + desc: "read object", + json: `{}`, + isFound: true, + data: `{}`, + }, + { + desc: "read array", + json: `[]`, + isFound: true, + data: `[]`, + }, + { + desc: "read boolean", + json: `true`, + isFound: true, + data: `true`, + }, + + // Found key tests + { + desc: "handling multiple nested keys with same name", + json: `{"a":[{"b":1},{"b":2},3],"c":{"c":[1,2]}} }`, + path: []string{"c", "c"}, + isFound: true, + data: `[1,2]`, + }, + { + desc: "read basic key", + json: `{"a":"b"}`, + path: []string{"a"}, + isFound: true, + data: `"b"`, + }, + { + desc: "read basic key with space", + json: `{"a": "b"}`, + path: []string{"a"}, + isFound: true, + data: `"b"`, + }, + { + desc: "read composite key", + json: `{"a": { "b":{"c":"d" }}}`, + path: []string{"a", "b", "c"}, + isFound: true, + data: `"d"`, + }, + { + desc: `read numberic value as string`, + json: `{"a": "b", "c": 1}`, + path: []string{"c"}, + isFound: true, + data: `1`, + }, + { + desc: `handle multiple nested keys with same name`, + json: `{"a":[{"b":1},{"b":2},3],"c":{"c":[1,2]}} }`, + path: []string{"c", "c"}, + isFound: true, + data: `[1,2]`, + }, + { + desc: `read string values with quotes`, + json: `{"a": "string\"with\"quotes"}`, + path: []string{"a"}, + isFound: true, + data: `"string\"with\"quotes"`, + }, + { + desc: `read object`, + json: `{"a": { "b":{"c":"d" }}}`, + path: []string{"a", "b"}, + isFound: true, + data: `{"c":"d" }`, + }, + { + desc: `empty path`, + json: `{"c":"d" }`, + path: []string{}, + isFound: true, + data: `{"c":"d" }`, + }, + { + desc: `formatted JSON value`, + json: "{\n \"a\": \"b\"\n}", + path: []string{"a"}, + isFound: true, + data: `"b"`, + }, + { + desc: `formatted JSON value 2`, + json: "{\n \"a\":\n {\n\"b\":\n {\"c\":\"d\",\n\"e\": \"f\"}\n}\n}", + path: []string{"a", "b"}, + isFound: true, + data: "{\"c\":\"d\",\n\"e\": \"f\"}", + }, + { + desc: `whitespace`, + json: " \n\r\t{ \n\r\t\"whitespace\" \n\r\t: \n\r\t333 \n\r\t} \n\r\t", + path: []string{"whitespace"}, + isFound: true, + data: "333", + }, + { + desc: `escaped backslash quote`, + json: `{"a": "\\\""}`, + path: []string{"a"}, + isFound: true, + data: `"\\\""`, + }, + { + desc: `unescaped backslash quote`, + json: `{"a": "\\"}`, + path: []string{"a"}, + isFound: true, + data: `"\\"`, + }, + { + desc: `unicode in JSON`, + json: `{"a": "15°C"}`, + path: []string{"a"}, + isFound: true, + data: `"15°C"`, + }, + { + desc: `no padding + nested`, + json: `{"a":{"a":"1"},"b":2}`, + path: []string{"b"}, + isFound: true, + data: `2`, + }, + { + desc: `no padding + nested + array`, + json: `{"a":{"b":[1,2]},"c":3}`, + path: []string{"c"}, + isFound: true, + data: `3`, + }, + { + desc: `empty key`, + json: `{"":{"":{"":true}}}`, + path: []string{"", "", ""}, + isFound: true, + data: `true`, + }, + + // Escaped key tests + { + desc: `key with simple escape`, + json: `{"a\\b":1}`, + path: []string{"a\\b"}, + isFound: true, + data: `1`, + }, + { + desc: `key and value with whitespace escapes`, + json: `{"key\b\f\n\r\tkey":"value\b\f\n\r\tvalue"}`, + path: []string{"key\b\f\n\r\tkey"}, + isFound: true, + data: `"value\b\f\n\r\tvalue"`, // value is not unescaped since this is Get(), but the key should work correctly + }, + { + desc: `key with Unicode escape`, + json: `{"a\u00B0b":1}`, + path: []string{"a\u00B0b"}, + isFound: true, + data: `1`, + }, + { + desc: `key with complex escape`, + json: `{"a\uD83D\uDE03b":1}`, + path: []string{"a\U0001F603b"}, + isFound: true, + data: `1`, + }, + + { // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance + desc: `malformed with trailing whitespace`, + json: `{"a":1 `, + path: []string{"a"}, + isFound: true, + data: `1`, + }, + { // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance + desc: `malformed with wrong closing bracket`, + json: `{"a":1]`, + path: []string{"a"}, + isFound: true, + data: `1`, + }, + + // Not found key tests + { + desc: `empty input`, + json: ``, + path: []string{"a"}, + isFound: false, + }, + { + desc: "non-existent key 1", + json: `{"a":"b"}`, + path: []string{"c"}, + isFound: false, + }, + { + desc: "non-existent key 2", + json: `{"a":"b"}`, + path: []string{"b"}, + isFound: false, + }, + { + desc: "non-existent key 3", + json: `{"aa":"b"}`, + path: []string{"a"}, + isFound: false, + }, + { + desc: "apply scope of parent when search for nested key", + json: `{"a": { "b": 1}, "c": 2 }`, + path: []string{"a", "b", "c"}, + isFound: false, + }, + { + desc: `apply scope to key level`, + json: `{"a": { "b": 1}, "c": 2 }`, + path: []string{"b"}, + isFound: false, + }, + { + desc: `handle escaped quote in key name in JSON`, + json: `{"key\"key": 1}`, + path: []string{"key"}, + isFound: false, + }, + { + desc: "handling multiple keys with different name", + json: `{"a":{"a":1},"b":{"a":3,"c":[1,2]}}`, + path: []string{"a", "c"}, + isFound: false, + }, + { + desc: "handling nested json", + json: `{"a":{"b":{"c":1},"d":4}}`, + path: []string{"a", "d"}, + isFound: true, + data: `4`, + }, + { // Issue #148 + desc: `missing key in different key same level`, + json: `{"s":"s","ic":2,"r":{"o":"invalid"}}`, + path: []string{"ic", "o"}, + isFound: false, + }, + + // Error/invalid tests + { + desc: `handle escaped quote in key name in JSON`, + json: `{"key\"key": 1}`, + path: []string{"key"}, + isFound: false, + }, + { + desc: `missing closing brace, but can still find key`, + json: `{"a":"b"`, + path: []string{"a"}, + isFound: true, + data: `"b"`, + }, + { + desc: `missing value closing quote`, + json: `{"a":"b`, + path: []string{"a"}, + isErr: true, + }, + { + desc: `missing value closing curly brace`, + json: `{"a": { "b": "c"`, + path: []string{"a"}, + isErr: true, + }, + { + desc: `missing value closing square bracket`, + json: `{"a": [1, 2, 3 }`, + path: []string{"a"}, + isErr: true, + }, + { + desc: `missing value 1`, + json: `{"a":`, + path: []string{"a"}, + isErr: true, + }, + { + desc: `missing value 2`, + json: `{"a": `, + path: []string{"a"}, + isErr: true, + }, + { + desc: `missing value 3`, + json: `{"a":}`, + path: []string{"a"}, + isErr: true, + }, + { + desc: `malformed array (no closing brace)`, + json: `{"a":[, "b":123}`, + path: []string{"b"}, + isFound: false, + }, + { // Issue #81 + desc: `missing key in object in array`, + json: `{"p":{"a":[{"u":"abc","t":"th"}]}}`, + path: []string{"p", "a", "[0]", "x"}, + isFound: false, + }, + { // Issue #81 counter test + desc: `existing key in object in array`, + json: `{"p":{"a":[{"u":"abc","t":"th"}]}}`, + path: []string{"p", "a", "[0]", "u"}, + isFound: true, + data: `"abc"`, + }, + { // This test returns not found instead of a parse error, as checking for the malformed JSON would reduce performance + desc: "malformed key (followed by comma followed by colon)", + json: `{"a",:1}`, + path: []string{"a"}, + isFound: false, + }, + { // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) + desc: "malformed 'colon chain', lookup first string", + json: `{"a":"b":"c"}`, + path: []string{"a"}, + isFound: true, + data: `"b"`, + }, + { // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) + desc: "malformed 'colon chain', lookup second string", + json: `{"a":"b":"c"}`, + path: []string{"b"}, + isFound: true, + data: `"c"`, + }, + // Array index paths + { + desc: "last key in path is index", + json: `{"a":[{"b":1},{"b":"2"}, 3],"c":{"c":[1,2]}}`, + path: []string{"a", "[1]"}, + isFound: true, + data: `{"b":"2"}`, + }, + { + desc: "get string from array", + json: `{"a":[{"b":1},"foo", 3],"c":{"c":[1,2]}}`, + path: []string{"a", "[1]"}, + isFound: true, + data: `"foo"`, + }, + { + desc: "key in path is index", + json: `{"a":[{"b":"1"},{"b":"2"},3],"c":{"c":[1,2]}}`, + path: []string{"a", "[0]", "b"}, + isFound: true, + data: `"1"`, + }, + { + desc: "last key in path is an index to value in array (formatted json)", + json: `{ + "a": [ + { + "b": 1 + }, + {"b":"2"}, + 3 + ], + "c": { + "c": [ + 1, + 2 + ] + } + }`, + path: []string{"a", "[1]"}, + isFound: true, + data: `{"b":"2"}`, + }, + { + desc: "key in path is index (formatted json)", + json: `{ + "a": [ + {"b": 1}, + {"b": "2"}, + 3 + ], + "c": { + "c": [ + 1, + 2 + ] + } + }`, + path: []string{"a", "[0]", "b"}, + isFound: true, + data: `1`, + }, + { + // Issue #178: Crash in searchKeys + desc: `invalid json`, + json: `{{{"":`, + path: []string{"a", "b"}, + isFound: false, + }, + { + desc: `opening brace instead of closing and without key`, + json: `{"a":1{`, + path: []string{"b"}, + isFound: false, + }, +} + var getIntTests = []GetTest{ { desc: `read numeric value as number`, @@ -1324,6 +1759,19 @@ func TestGet(t *testing.T) { ) } +func TestGetRaw(t *testing.T) { + runGetTests(t, "GetRaw()", getRawTests, + func(test GetTest) (value interface{}, dataType ValueType, err error) { + value, _, err = GetRaw([]byte(test.json), test.path...) + return + }, + func(test GetTest, value interface{}) (bool, interface{}) { + expected := []byte(test.data.(string)) + return bytes.Equal(expected, value.([]byte)), expected + }, + ) +} + func TestGetString(t *testing.T) { runGetTests(t, "GetString()", getStringTests, func(test GetTest) (value interface{}, dataType ValueType, err error) { @@ -1787,6 +2235,87 @@ func TestEachKey(t *testing.T) { } } +func TestEachRawKey(t *testing.T) { + paths := [][]string{ + {"name"}, + {"order"}, + {"nested", "a"}, + {"nested", "b"}, + {"nested2", "a"}, + {"nested", "nested3", "b"}, + {"arr", "[1]", "b"}, + {"arrInt", "[3]"}, + {"arrInt", "[5]"}, // Should not find last key + {"nested"}, + {"arr", "["}, // issue#177 Invalid arguments + {"a\n", "b\n"}, // issue#165 + {"nested", "b"}, // Should find repeated key + } + + keysFound := 0 + + EachRawKey(testJson, func(idx int, value []byte, err error) { + keysFound++ + + switch idx { + case 0: + if string(value) != `"Name"` { + t.Error("Should find 1 key", string(value)) + } + case 1: + if string(value) != `"Order"` { + t.Errorf("Should find 2 key") + } + case 2: + if string(value) != `"test"` { + t.Errorf("Should find 3 key") + } + case 3: + if string(value) != "2" { + t.Errorf("Should find 4 key") + } + case 4: + if string(value) != `"test2"` { + t.Error("Should find 5 key", string(value)) + } + case 5: + if string(value) != "4" { + t.Errorf("Should find 6 key") + } + case 6: + if string(value) != "2" { + t.Errorf("Should find 7 key") + } + case 7: + if string(value) != "4" { + t.Error("Should find 8 key", string(value)) + } + case 8: + t.Errorf("Found key #8 that should not be found") + case 9: + if string(value) != `{"a":"test", "b":2, "nested3":{"a":"test3","b":4}, "c": "unknown"}` { + t.Error("Should find 9 key", string(value)) + } + case 10: + t.Errorf("Found key #10 that should not be found") + case 11: + if string(value) != "99" { + t.Error("Should find 10 key", string(value)) + } + case 12: + if string(value) != "2" { + t.Errorf("Should find 11 key") + } + default: + t.Errorf("Should find only 10 keys, got %v key", idx) + } + }, paths...) + + if keysFound != 11 { + t.Errorf("Should find 11 keys: %d", keysFound) + } +} + type ParseTest struct { in string intype ValueType