Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MB-58034: Custom Date Time parsers not being applied on query object #1860

Merged
merged 16 commits into from
Sep 7, 2023
Merged
40 changes: 35 additions & 5 deletions mapping/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ import (
// are used. To disable this automatic handling, set
// Dynamic to false.
type DocumentMapping struct {
Enabled bool `json:"enabled"`
Dynamic bool `json:"dynamic"`
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
Fields []*FieldMapping `json:"fields,omitempty"`
DefaultAnalyzer string `json:"default_analyzer,omitempty"`
Enabled bool `json:"enabled"`
Dynamic bool `json:"dynamic"`
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
Fields []*FieldMapping `json:"fields,omitempty"`
DefaultAnalyzer string `json:"default_analyzer,omitempty"`
DefaultDateTimeParser string `json:"default_date_time_parser,omitempty"`

// StructTagKey overrides "json" when looking for field names in struct tags
StructTagKey string `json:"struct_tag_key,omitempty"`
Expand Down Expand Up @@ -96,6 +97,14 @@ func (dm *DocumentMapping) analyzerNameForPath(path string) string {
return ""
}

func (dm *DocumentMapping) dateTimeParserForPath(path string) string {
field := dm.fieldDescribedByPath(path)
if field != nil {
return field.DateFormat
}
return ""
}

func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
pathElements := decodePath(path)
if len(pathElements) > 1 {
Expand Down Expand Up @@ -266,6 +275,11 @@ func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
if err != nil {
return err
}
case "default_datetime_parser":
err := json.Unmarshal(v, &dm.DefaultDateTimeParser)
if err != nil {
return err
}
case "properties":
err := json.Unmarshal(v, &dm.Properties)
if err != nil {
Expand Down Expand Up @@ -309,6 +323,22 @@ func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
return rv
}

func (dm *DocumentMapping) defaultDateTimeParser(path []string) string {
current := dm
rv := current.DefaultDateTimeParser
for _, pathElement := range path {
var ok bool
current, ok = current.Properties[pathElement]
if !ok {
break
}
if current.DefaultDateTimeParser != "" {
rv = current.DefaultDateTimeParser
}
}
return rv
}

func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
// allow default "json" tag to be overridden
structTagKey := dm.StructTagKey
Expand Down
40 changes: 32 additions & 8 deletions mapping/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package mapping
import (
"encoding/json"
"fmt"

index "github.com/blevesearch/bleve_index_api"

CascadingRadium marked this conversation as resolved.
Show resolved Hide resolved
"github.com/blevesearch/bleve/v2/analysis"
Expand Down Expand Up @@ -417,20 +418,43 @@ func (im *IndexMappingImpl) DateTimeParserNamed(name string) analysis.DateTimePa
return dateTimeParser
}

func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string {

func (im *IndexMappingImpl) DatetimeParserNameForPath(path string) string {
// first we look for explicit mapping on the field
for _, docMapping := range im.TypeMapping {
pathMapping, _ := docMapping.documentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].Analyzer != "" {
return pathMapping.Fields[0].Analyzer
}
dateTimeParser := docMapping.dateTimeParserForPath(path)
if dateTimeParser != "" {
return dateTimeParser
}
}

// now try the default mapping
pathMapping, _ := im.DefaultMapping.documentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].DateFormat != "" {
return pathMapping.Fields[0].DateFormat
}
}
}

// next we will try default date-time parsers for the path
pathDecoded := decodePath(path)
for _, docMapping := range im.TypeMapping {
if docMapping.Enabled {
rv := docMapping.defaultDateTimeParser(pathDecoded)
if rv != "" {
return rv
}
}
}
// now the default date-time parser for the default mapping
if im.DefaultMapping.Enabled {
rv := im.DefaultMapping.defaultDateTimeParser(pathDecoded)
if rv != "" {
return rv
}
}

return im.DefaultDateTimeParser
}

Expand Down
1 change: 1 addition & 0 deletions mapping/mapping.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ type IndexMapping interface {
MapDocument(doc *document.Document, data interface{}) error
Validate() error

DatetimeParserNameForPath(path string) string
DateTimeParserNamed(name string) analysis.DateTimeParser

DefaultSearchField() string
Expand Down
111 changes: 97 additions & 14 deletions search/query/date_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ import (
index "github.com/blevesearch/bleve_index_api"
)

// QueryDateTimeParser controls the default query date time parser
// QueryDateTimeParser controls the default query date time parser in User Path.
CascadingRadium marked this conversation as resolved.
Show resolved Hide resolved
var QueryDateTimeParser = optional.Name

// QueryDateTimeFormat controls the format when Marshaling to JSON
// QueryDateTimeFormat controls the format when Marshaling to JSON in User Path.
var QueryDateTimeFormat = time.RFC3339

var cache = registry.NewCache()
Expand Down Expand Up @@ -68,6 +68,7 @@ func (t *BleveQueryTime) MarshalJSON() ([]byte, error) {
}

func (t *BleveQueryTime) UnmarshalJSON(data []byte) error {
// called in the User Path where we can use the default date time parser.
var timeString string
err := json.Unmarshal(data, &timeString)
if err != nil {
Expand All @@ -84,19 +85,74 @@ func (t *BleveQueryTime) UnmarshalJSON(data []byte) error {
return nil
}

func DateRangeUnmarshal(input []byte, obj *DateRangeQuery) error {
CascadingRadium marked this conversation as resolved.
Show resolved Hide resolved
// Only called in ParseQuery path, since we do not know the date time parser.
var objmap map[string]interface{}
err := json.Unmarshal(input, &objmap)
if err != nil {
return err
}
if objmap["start"] != nil {
rawStart, canConvert := objmap["start"].(string)
if !canConvert {
return fmt.Errorf("invalid start")
}
obj.rawStart = rawStart
}
if objmap["end"] != nil {
rawEnd, canConvert := objmap["end"].(string)
if !canConvert {
return fmt.Errorf("invalid end")
}
obj.rawEnd = rawEnd
}
if objmap["inclusive_start"] != nil {
inclusiveStart, canConvert := objmap["inclusive_start"].(bool)
if !canConvert {
return fmt.Errorf("invalid inclusive_start")
}
obj.InclusiveStart = &inclusiveStart
}
if objmap["inclusive_end"] != nil {
inclusiveEnd, canConvert := objmap["inclusive_end"].(bool)
if !canConvert {
return fmt.Errorf("invalid inclusive_end")
}
obj.InclusiveEnd = &inclusiveEnd
}
if objmap["boost"] != nil {
boost, canConvert := objmap["boost"].(float64)
if !canConvert {
return fmt.Errorf("invalid boost")
}
boostVal := Boost(boost)
obj.BoostVal = &boostVal
}
if objmap["field"] != nil {
fieldVal, canConvert := objmap["field"].(string)
if !canConvert {
return fmt.Errorf("invalid field")
}
obj.FieldVal = fieldVal
}
return nil
}

type DateRangeQuery struct {
Start BleveQueryTime `json:"start,omitempty"`
End BleveQueryTime `json:"end,omitempty"`
InclusiveStart *bool `json:"inclusive_start,omitempty"`
InclusiveEnd *bool `json:"inclusive_end,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
rawStart string `json:"-"`
rawEnd string `json:"-"`
}

// NewDateRangeQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
// top-level config.QueryDateTimeParser
// top-level config.QueryDateTimeParser
// Either, but not both endpoints can be nil.
func NewDateRangeQuery(start, end time.Time) *DateRangeQuery {
return NewDateRangeInclusiveQuery(start, end, nil, nil)
Expand All @@ -105,7 +161,7 @@ func NewDateRangeQuery(start, end time.Time) *DateRangeQuery {
// NewDateRangeInclusiveQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
// top-level config.QueryDateTimeParser
// top-level config.QueryDateTimeParser
// Either, but not both endpoints can be nil.
// startInclusive and endInclusive control inclusion of the endpoints.
func NewDateRangeInclusiveQuery(start, end time.Time, startInclusive, endInclusive *bool) *DateRangeQuery {
Expand Down Expand Up @@ -135,16 +191,33 @@ func (q *DateRangeQuery) Field() string {
}

func (q *DateRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
min, max, err := q.parseEndpoints()
if err != nil {
return nil, err
}

field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}

if q.rawStart != "" || q.rawEnd != "" {
CascadingRadium marked this conversation as resolved.
Show resolved Hide resolved
// ParseQuery path since at least one of rawStart and rawEnd is not empty
// parse rawStart and rawEnd to time.Time objects
var err error
dateTimeParserName := m.DatetimeParserNameForPath(field)
dateTimeParser := m.DateTimeParserNamed(dateTimeParserName)
if q.rawStart != "" {
q.Start.Time, err = dateTimeParser.ParseDateTime(q.rawStart)
if err != nil {
return nil, fmt.Errorf("%v, date time parser name: %s", err, dateTimeParserName)
}
}
if q.rawEnd != "" {
q.End.Time, err = dateTimeParser.ParseDateTime(q.rawEnd)
if err != nil {
return nil, fmt.Errorf("%v, date time parser name: %s", err, dateTimeParserName)
}
}
}
min, max, err := q.parseEndpoints()
if err != nil {
return nil, err
}
return searcher.NewNumericRangeSearcher(ctx, i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), options)
}

Expand Down Expand Up @@ -172,13 +245,23 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
}

func (q *DateRangeQuery) Validate() error {
// First test for User path
if q.Start.IsZero() && q.End.IsZero() {
return fmt.Errorf("must specify start or end")
// Test for ParseQuery path
if q.rawStart == "" && q.rawEnd == "" {
// Really invalid now
return fmt.Errorf("date range query must specify at least one of start/end")
}
}
_, _, err := q.parseEndpoints()
if err != nil {
return err
if !q.Start.IsZero() || !q.End.IsZero() {
// User path
_, _, err := q.parseEndpoints()
if err != nil {
return err
}
}
// Do not validate endpoints for ParseQuery path since we do not know the date time parser
// Instead validate in the Searcher, where we get the index mapping.
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion search/query/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ func ParseQuery(input []byte) (Query, error) {
_, hasEnd := tmp["end"]
if hasStart || hasEnd {
var rv DateRangeQuery
err := json.Unmarshal(input, &rv)
err := DateRangeUnmarshal(input, &rv)
if err != nil {
return nil, err
}
Expand Down
8 changes: 0 additions & 8 deletions search/query/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,6 @@ func TestParseQuery(t *testing.T) {
return q
}(),
},
{
input: []byte(`{"start":"` + startDateStr + `","end":"` + endDateStr + `","field":"desc"}`),
output: func() Query {
q := NewDateRangeQuery(startDate, endDate)
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"prefix":"budwei","field":"desc"}`),
output: func() Query {
Expand Down