Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MB-58134: Skip parsing date time fields with timestamps #1870

Merged
merged 13 commits into from
Sep 8, 2023
51 changes: 51 additions & 0 deletions analysis/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package analysis

import (
"fmt"
"math"
"time"
)

Expand Down Expand Up @@ -99,6 +100,56 @@ func (a *DefaultAnalyzer) Analyze(input []byte) TokenStream {

var ErrInvalidDateTime = fmt.Errorf("unable to parse datetime with any of the layouts")

const UnixSecs = "unix_sec"
const UnixMilliSecs = "unix_milli"
const UnixMicroSecs = "unix_micro"
const UnixNanoSecs = "unix_nano"

type TimestampBounds struct {
Min int64
Max int64
}

var UnixTimestampFormats = map[string]*TimestampBounds{
CascadingRadium marked this conversation as resolved.
Show resolved Hide resolved
UnixSecs: {
Min: math.MinInt64 / 1000000000,
Max: math.MaxInt64 / 1000000000,
},
UnixMilliSecs: {
Min: math.MinInt64 / 1000000,
Max: math.MaxInt64 / 1000000,
},
UnixMicroSecs: {
Min: math.MinInt64 / 1000,
Max: math.MaxInt64 / 1000,
},
UnixNanoSecs: {
Min: math.MinInt64,
Max: math.MaxInt64,
},
}

func convertTimestamp(timestamp int64, format string) int64 {
CascadingRadium marked this conversation as resolved.
Show resolved Hide resolved
if format == UnixSecs {
return timestamp * 1000000000
} else if format == UnixMilliSecs {
return timestamp * 1000000
} else if format == UnixMicroSecs {
return timestamp * 1000
}
return timestamp
}

// ValidateAndConvertTimestamp validates the timestamp against the bounds and
// converts it to the nanoseconds if valid.
func ValidateAndConvertTimestamp(timestamp int64, bounds *TimestampBounds, format string) (int64, error) {
if timestamp > bounds.Min && timestamp < bounds.Max {
return convertTimestamp(timestamp, format), nil
} else {
CascadingRadium marked this conversation as resolved.
Show resolved Hide resolved
return 0, fmt.Errorf("timestamp out of range")
}
}

type DateTimeParser interface {
ParseDateTime(string) (time.Time, error)
}
Expand Down
13 changes: 13 additions & 0 deletions document/field_datetime.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,19 @@ func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, d
return nil, fmt.Errorf("cannot represent %s in this type", dt)
}

func NewDateTimeFieldWithTimestamp(name string, arrayPositions []uint64, timestamp int64, options index.FieldIndexingOptions) (*DateTimeField, error) {
prefixCoded := numeric.MustNewPrefixCodedInt64(timestamp, 0)
return &DateTimeField{
name: name,
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}, nil
}

func canRepresent(dt time.Time) bool {
if dt.Before(MinTimeRepresentable) || dt.After(MaxTimeRepresentable) {
return false
Expand Down
26 changes: 24 additions & 2 deletions mapping/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"reflect"
"time"

"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)

Expand Down Expand Up @@ -73,7 +74,10 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
if field.DateFormat != "" {
_, err = cache.DateTimeParserNamed(field.DateFormat)
if err != nil {
return err
_, unixFormat := analysis.UnixTimestampFormats[field.DateFormat]
if !unixFormat {
return err
}
}
}
switch field.Type {
Expand Down Expand Up @@ -436,6 +440,22 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
}
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
if subDocMapping != nil {
// index by explicit mapping in case of datetime
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "datetime" {
// This number is a UNIX timestamp
// hence must parse as a datetime
bounds, isUnixFormat := analysis.UnixTimestampFormats[fieldMapping.DateFormat]
if isUnixFormat {
CascadingRadium marked this conversation as resolved.
Show resolved Hide resolved
timestamp, err := analysis.ValidateAndConvertTimestamp(propertyValue.Int(), bounds, fieldMapping.DateFormat)
if err == nil {
fieldMapping.processTimestamp(timestamp, pathString, path, indexes, context)
}
}
}
}
}
dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
return
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
Expand All @@ -446,7 +466,9 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
if fieldMapping.Type != "datetime" {
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
Expand Down
39 changes: 35 additions & 4 deletions mapping/field.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"encoding/json"
"fmt"
"net"
"strconv"
"time"

"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
Expand Down Expand Up @@ -231,11 +232,24 @@ func (fm *FieldMapping) processString(propertyValueString string, pathString str
if fm.DateFormat != "" {
dateTimeFormat = fm.DateFormat
}
dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat)
if dateTimeParser != nil {
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
bounds, isUnixFormat := analysis.UnixTimestampFormats[dateTimeFormat]
if !isUnixFormat {
dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat)
if dateTimeParser != nil {
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err == nil {
fm.processTime(parsedDateTime, pathString, path, indexes, context)
}
}
} else {
// special case for unix timestamp
// we need to convert the string to a time object
timestamp, err := strconv.ParseInt(propertyValueString, 10, 64)
if err == nil {
fm.processTime(parsedDateTime, pathString, path, indexes, context)
timestamp, err = analysis.ValidateAndConvertTimestamp(timestamp, bounds, dateTimeFormat)
if err == nil {
fm.processTimestamp(timestamp, pathString, path, indexes, context)
}
}
}
} else if fm.Type == "IP" {
Expand All @@ -259,6 +273,23 @@ func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString stri
}
}

func (fm *FieldMapping) processTimestamp(unixTimestamp int64, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
if fm.Type == "datetime" {
options := fm.Options()
field, err := document.NewDateTimeFieldWithTimestamp(fieldName, indexes, unixTimestamp, options)
if err == nil {
context.doc.AddField(field)
} else {
logger.Printf("could not build date %v", err)
CascadingRadium marked this conversation as resolved.
Show resolved Hide resolved
}

if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}

func (fm *FieldMapping) processTime(propertyValueTime time.Time, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
if fm.Type == "datetime" {
Expand Down
1 change: 1 addition & 0 deletions mapping/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package mapping
import (
"encoding/json"
"fmt"

index "github.com/blevesearch/bleve_index_api"

CascadingRadium marked this conversation as resolved.
Show resolved Hide resolved
"github.com/blevesearch/bleve/v2/analysis"
Expand Down
4 changes: 2 additions & 2 deletions search/query/date_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ type DateRangeQuery struct {
// NewDateRangeQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
// top-level config.QueryDateTimeParser
// top-level config.QueryDateTimeParser
// Either, but not both endpoints can be nil.
func NewDateRangeQuery(start, end time.Time) *DateRangeQuery {
return NewDateRangeInclusiveQuery(start, end, nil, nil)
Expand All @@ -105,7 +105,7 @@ func NewDateRangeQuery(start, end time.Time) *DateRangeQuery {
// NewDateRangeInclusiveQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
// top-level config.QueryDateTimeParser
// top-level config.QueryDateTimeParser
// Either, but not both endpoints can be nil.
// startInclusive and endInclusive control inclusion of the endpoints.
func NewDateRangeInclusiveQuery(start, end time.Time, startInclusive, endInclusive *bool) *DateRangeQuery {
Expand Down
Loading