Skip to content

Commit

Permalink
MB-58134: Skip parsing date time fields with timestamps (#1870)
Browse files Browse the repository at this point in the history
A user can set one of four values as the DateFormat for a
DatetimeFieldMapping
- "unix_micro"
- "unix_sec"
- "unix_milli"
- "unix_nano"

This indicates that the field has a UNIX epoch timestamp in
microseconds/seconds/milliseconds/nanoseconds.
Such fields should not be parsed with a date time parser since they
already contain the timestamp.
User can perform date range queries on these fields like normal.
  • Loading branch information
CascadingRadium authored Sep 8, 2023
1 parent 2267798 commit d1cd873
Show file tree
Hide file tree
Showing 9 changed files with 454 additions and 2 deletions.
52 changes: 52 additions & 0 deletions analysis/datetime/timestamp/microseconds/microseconds.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package microseconds

import (
"math"
"strconv"
"time"

"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)

const Name = "unix_micro"

type DateTimeParser struct {
}

var minBound int64 = math.MinInt64 / 1000
var maxBound int64 = math.MaxInt64 / 1000

func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
// unix timestamp is milliseconds since UNIX epoch
timestamp, err := strconv.ParseInt(input, 10, 64)
if err != nil {
return time.Time{}, "", analysis.ErrInvalidTimestampString
}
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.UnixMicro(timestamp), "", nil
}

func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
return &DateTimeParser{}, nil
}

func init() {
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
}
52 changes: 52 additions & 0 deletions analysis/datetime/timestamp/milliseconds/milliseconds.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package milliseconds

import (
"math"
"strconv"
"time"

"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)

const Name = "unix_milli"

type DateTimeParser struct {
}

var minBound int64 = math.MinInt64 / 1000000
var maxBound int64 = math.MaxInt64 / 1000000

func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
// unix timestamp is milliseconds since UNIX epoch
timestamp, err := strconv.ParseInt(input, 10, 64)
if err != nil {
return time.Time{}, "", analysis.ErrInvalidTimestampString
}
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.UnixMilli(timestamp), "", nil
}

func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
return &DateTimeParser{}, nil
}

func init() {
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
}
52 changes: 52 additions & 0 deletions analysis/datetime/timestamp/nanoseconds/nanoseconds.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package nanoseconds

import (
"math"
"strconv"
"time"

"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)

const Name = "unix_nano"

type DateTimeParser struct {
}

var minBound int64 = math.MinInt64
var maxBound int64 = math.MaxInt64

func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
// unix timestamp is milliseconds since UNIX epoch
timestamp, err := strconv.ParseInt(input, 10, 64)
if err != nil {
return time.Time{}, "", analysis.ErrInvalidTimestampString
}
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.Unix(0, timestamp), "", nil
}

func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
return &DateTimeParser{}, nil
}

func init() {
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
}
52 changes: 52 additions & 0 deletions analysis/datetime/timestamp/seconds/seconds.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package seconds

import (
"math"
"strconv"
"time"

"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)

const Name = "unix_sec"

type DateTimeParser struct {
}

var minBound int64 = math.MinInt64 / 1000000000
var maxBound int64 = math.MaxInt64 / 1000000000

func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
// unix timestamp is seconds since UNIX epoch
timestamp, err := strconv.ParseInt(input, 10, 64)
if err != nil {
return time.Time{}, "", analysis.ErrInvalidTimestampString
}
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.Unix(timestamp, 0), "", nil
}

func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
return &DateTimeParser{}, nil
}

func init() {
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
}
3 changes: 3 additions & 0 deletions analysis/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ func (a *DefaultAnalyzer) Analyze(input []byte) TokenStream {

var ErrInvalidDateTime = fmt.Errorf("unable to parse datetime with any of the layouts")

var ErrInvalidTimestampString = fmt.Errorf("unable to parse timestamp string")
var ErrInvalidTimestampRange = fmt.Errorf("timestamp out of range")

type DateTimeParser interface {
ParseDateTime(string) (time.Time, string, error)
}
Expand Down
4 changes: 4 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ import (
_ "github.com/blevesearch/bleve/v2/analysis/datetime/flexible"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/optional"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/sanitized"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds"

// languages
_ "github.com/blevesearch/bleve/v2/analysis/lang/ar"
Expand Down
3 changes: 2 additions & 1 deletion http/doc_get.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package http
import (
"fmt"
"net/http"
"strconv"

index "github.com/blevesearch/bleve_index_api"
)
Expand Down Expand Up @@ -91,7 +92,7 @@ func (h *DocGetHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
if err == nil {
if layout == "" {
// layout not set probably means it was indexed as a timestamp
newval = d.UnixNano()
newval = strconv.FormatInt(d.UnixNano(), 10)
} else {
newval = d.Format(layout)
}
Expand Down
3 changes: 2 additions & 1 deletion index_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"io"
"os"
"path/filepath"
"strconv"
"sync"
"sync/atomic"
"time"
Expand Down Expand Up @@ -678,7 +679,7 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
if err == nil {
if layout == "" {
// layout not set probably means it was indexed as a timestamp
value = datetime.UnixNano()
value = strconv.FormatInt(datetime.UnixNano(), 10)
} else {
value = datetime.Format(layout)
}
Expand Down
Loading

0 comments on commit d1cd873

Please sign in to comment.