From 17b2563b696ef3a9f1eefc835fb22a0bd3e22e6f Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 9 Sep 2021 16:36:27 -0700 Subject: [PATCH] Add NewNames and Value.NormalizeNames The NewNames function produces a map of canonical names from a Go struct. It can be passed to Value.NormalizeNames to normalizes case-insensitive matches to a JSON object name to the canonical name. --- names.go | 146 +++++++++++++++++++++++++++++++++++++++ names_test.go | 186 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 332 insertions(+) create mode 100644 names.go create mode 100644 names_test.go diff --git a/names.go b/names.go new file mode 100644 index 0000000..227f126 --- /dev/null +++ b/names.go @@ -0,0 +1,146 @@ +// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package hujson + +import ( + "reflect" + "strings" +) + +// Names is a map of canonical JSON object names. +// The value for each entry is another map of JSON object names to use +// for any JSON sub-objects. +// +// As a special case, a map with only a single entry where the key is "*" +// indicates that the sub-map of names is to be applied to all sub-objects. +// +// See the example for Value.NormalizeNames for more information. +type Names map[string]Names + +// NewNames constructs a Names map for the provided type +// as typically understood by the "encoding/json" package. +// +// See the example for Value.NormalizeNames for more information. +func NewNames(t reflect.Type) Names { + // TODO(dsnet): Handle cycles in the type graph. + // TODO(dsnet): What happens when t implements json.Unmarshaler? + switch t.Kind() { + case reflect.Array, reflect.Slice, reflect.Ptr: + return NewNames(t.Elem()) + case reflect.Map: + names := NewNames(t.Elem()) + if len(names) == 0 { + return nil + } + return Names{"*": names} + case reflect.Struct: + names := make(Names) + for i := 0; i < t.NumField(); i++ { + sf := t.Field(i) + if sf.PkgPath != "" { + // TODO(dsnet): Technically, an embedded, unexported type with + // exported fields can have serializable fields. + // This almost never occurs in practice. + continue // unexported fields are ignored + } + + // Derive JSON name from either the Go field name or `json` tag. + name := sf.Name + inlined := sf.Anonymous && mayIndirect(sf.Type).Kind() == reflect.Struct + switch tag := sf.Tag.Get("json"); tag { + case "": + break // do nothing + case "-": + continue // explicitly ignored field + default: + if i := strings.IndexByte(tag, ','); i >= 0 { + tag = tag[:i] + } + if tag != "" { + name = tag + inlined = false // explicitly named fields are never inlined + } + } + + // If inlined, hoist all child names up to the parent. + // Otherwise, just insert the current name. + if inlined { + // TODO(dsnet): This does not properly handle name conflicts. + // However, conflicts rarely occur in practice. + // See https://github.com/golang/go/blob/aa4e0f528e1e018e2847decb549cfc5ac07ecf20/src/encoding/json/encode.go#L1352-L1378 + for name, subNames := range NewNames(sf.Type) { + names[name] = subNames + } + } else { + names[name] = NewNames(sf.Type) + } + } + if len(names) == 0 { + return nil + } + return names + default: + return nil + } +} + +func mayIndirect(t reflect.Type) reflect.Type { + if t.Kind() == reflect.Ptr { + t = t.Elem() + } + return t +} + +// NormalizeNames recursively iterates through v and replaces any JSON object +// names that is a case-insensitive match with a name found in names, +// with the canonical name found in names. +// +// See the example for Value.NormalizeNames for more information. +func (v *Value) NormalizeNames(names Names) { + v.normalizeNames(names) + v.UpdateOffsets() +} +func (v *Value) normalizeNames(names Names) { + if len(names) == 0 { + return + } + switch v2 := v.Value.(type) { + case *Object: + // If names is a map with only a "*" key, + // then apply the same subNames map to all map values. + if subNames, ok := names["*"]; ok && len(names) == 1 { + for i := range v2.Members { + v2.Members[i].Value.normalizeNames(subNames) + } + break + } + + for i := range v2.Members { + name := v2.Members[i].Name.Value.(Literal).String() + + // Fast-path: Exact match with names map. + subNames, ok := names[name] + if !ok { + // Slow-path: Case-insensitive match with names map. + var match string + for name2 := range names { + if (match == "" || match < name2) && strings.EqualFold(name, name2) { + match = name2 + } + } + // If a case-insensitive match was found, update the name. + if match != "" { + v2.Members[i].Name.Value = String(match) + subNames = names[match] + } + } + v2.Members[i].Value.normalizeNames(subNames) + } + case *Array: + for i := range v2.Elements { + v2.Elements[i].normalizeNames(names) + } + } +} diff --git a/names_test.go b/names_test.go new file mode 100644 index 0000000..27aafb0 --- /dev/null +++ b/names_test.go @@ -0,0 +1,186 @@ +// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package hujson + +import ( + "fmt" + "log" + "reflect" + "testing" + + "github.com/google/go-cmp/cmp" +) + +// The "encoding/json" package unfortunately uses case-insensitive matching +// when unmarshaling. For example, the following: +// +// {"NAME": ...} +// {"nAmE": ...} +// {"name": ...} +// {"Name": ...} +// +// are all equivalent when unmarshaling into a Go struct like: +// +// struct{ Name string } +// +// In order to conform some HuJSON value to consistently use the same set of +// JSON object names, a Names map can be derived from Go struct type +// and applied upon the HuJSON value using the Value.NormalizeNames method. +func ExampleValue_NormalizeNames() { + type MyStruct struct { + Alpha int + Bravo []struct { + Foo int + } `json:"bravo_wavo"` + Charlie map[string]struct { + Fizz int `json:"fizzy_wizzy"` + Buzz int `json:",omitempty"` + } + Ignored int `json:"-"` + unexported int + } + + // Derive the set of canonical names from the Go struct type. + names := NewNames(reflect.TypeOf(MyStruct{})) + // Verify that the derived names match what we expect. + gotNames := names + wantNames := Names{ + "Alpha": nil, // name comes from Go struct field + "bravo_wavo": { // name comes from `json` tag + "Foo": nil, // name comes from Go struct field + }, + "Charlie": { // name comes from Go struct field + "*": { // implies that all JSON object members use the same set of sub-names + "fizzy_wizzy": nil, // name comes from `json` tag + "Buzz": nil, // name comes from Go struct field + }, + }, + } + if diff := cmp.Diff(gotNames, wantNames); diff != "" { + log.Fatalf("NewNames mismatch (-want +got):\n%s", diff) + } + + // Parse some HuJSON input with strangely formatted names. + v, err := Parse([]byte(`{ + "AlPhA": 0, + "BRAVO_WAVO": [ + {"FOO": 0}, + {"fOo": 1}, + {"Foo": 2}, + ], + "charlie": { + "kEy": {"FIZZY_WIZZY": 0}, + "KeY": {"bUzZ": 1}, + }, +}`)) + if err != nil { + log.Fatal(err) + } + // Conform JSON object names in the HuJSON value to the canonical names. + v.NormalizeNames(gotNames) + fmt.Println(v) + + // Output: + // { + // "Alpha": 0, + // "bravo_wavo": [ + // {"Foo": 0}, + // {"Foo": 1}, + // {"Foo": 2}, + // ], + // "Charlie": { + // "kEy": {"fizzy_wizzy": 0}, + // "KeY": {"Buzz": 1}, + // }, + // } +} + +func TestNormalizeNames(t *testing.T) { + type MyStruct struct { + GoName int + JSONName int `json:"json_name"` + } + + tests := []struct { + typ interface{} + wantNames Names + in string + wantOut string + }{{ + typ: 0, + wantNames: nil, + in: `{"hello":"goodbye"}`, + wantOut: `{"hello":"goodbye"}`, + }, { + typ: new(int), + wantNames: nil, + in: `{"hello":"goodbye"}`, + wantOut: `{"hello":"goodbye"}`, + }, { + typ: struct { + GoName1 int + GoName2 int `json:",omitempty"` + JSONName int `json:"json_name"` + Ignored int `json:"-"` + unexported int `json:"fake_name"` + }{}, + wantNames: Names{"GoName1": nil, "GoName2": nil, "json_name": nil}, + in: `{"goname1":0,"goname2":0,"JSON_NAME":0,"JSONNAME":0}`, + wantOut: `{"GoName1":0,"GoName2":0,"json_name":0,"JSONNAME":0}`, + }, { + typ: struct { + M *[]map[int][]map[string][]struct { + F int `json:"field"` + } + }{}, + wantNames: Names{"M": {"*": {"*": {"field": nil}}}}, + in: `{"m":[{"hello":[{"goodbye":[{"FIELD":0}]}]}]}`, + wantOut: `{"M":[{"hello":[{"goodbye":[{"field":0}]}]}]}`, + }, { + typ: struct { + M map[string]struct{} + }{}, + wantNames: Names{"M": nil}, + }, { + typ: struct { + MyStruct + int + }{}, + wantNames: Names{"GoName": nil, "json_name": nil}, + }, { + typ: struct { + *MyStruct + *int + }{}, + wantNames: Names{"GoName": nil, "json_name": nil}, + }, { + typ: struct { + MyStruct `json:"my_struct"` + }{}, + wantNames: Names{"my_struct": {"GoName": nil, "json_name": nil}}, + }} + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + gotNames := NewNames(reflect.TypeOf(tt.typ)) + if diff := cmp.Diff(tt.wantNames, gotNames); diff != "" { + t.Errorf("NewNames(%T) mismatch (-want +got):\n%s", tt.typ, diff) + } + + if tt.in == "" { + return + } + v, err := Parse([]byte(tt.in)) + if err != nil { + t.Fatalf("Parse error: %v", err) + } + v.NormalizeNames(gotNames) + gotOut := v.String() + if diff := cmp.Diff(tt.wantOut, gotOut); diff != "" { + t.Errorf("v.NormalizeNames(%T) mismatch (-want +got):\n%s", tt.typ, diff) + } + }) + } +}