diff --git a/analysis/datetime/timestamp/microseconds/microseconds.go b/analysis/datetime/timestamp/microseconds/microseconds.go new file mode 100644 index 000000000..3efed0edd --- /dev/null +++ b/analysis/datetime/timestamp/microseconds/microseconds.go @@ -0,0 +1,52 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package microseconds + +import ( + "math" + "strconv" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "unix_micro" + +type DateTimeParser struct { +} + +var minBound int64 = math.MinInt64 / 1000 +var maxBound int64 = math.MaxInt64 / 1000 + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { + // unix timestamp is milliseconds since UNIX epoch + timestamp, err := strconv.ParseInt(input, 10, 64) + if err != nil { + return time.Time{}, "", analysis.ErrInvalidTimestampString + } + if timestamp < minBound || timestamp > maxBound { + return time.Time{}, "", analysis.ErrInvalidTimestampRange + } + return time.UnixMicro(timestamp), "", nil +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + return &DateTimeParser{}, nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/analysis/datetime/timestamp/milliseconds/milliseconds.go b/analysis/datetime/timestamp/milliseconds/milliseconds.go new file mode 100644 index 000000000..790153d20 --- /dev/null +++ b/analysis/datetime/timestamp/milliseconds/milliseconds.go @@ -0,0 +1,52 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package milliseconds + +import ( + "math" + "strconv" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "unix_milli" + +type DateTimeParser struct { +} + +var minBound int64 = math.MinInt64 / 1000000 +var maxBound int64 = math.MaxInt64 / 1000000 + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { + // unix timestamp is milliseconds since UNIX epoch + timestamp, err := strconv.ParseInt(input, 10, 64) + if err != nil { + return time.Time{}, "", analysis.ErrInvalidTimestampString + } + if timestamp < minBound || timestamp > maxBound { + return time.Time{}, "", analysis.ErrInvalidTimestampRange + } + return time.UnixMilli(timestamp), "", nil +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + return &DateTimeParser{}, nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/analysis/datetime/timestamp/nanoseconds/nanoseconds.go b/analysis/datetime/timestamp/nanoseconds/nanoseconds.go new file mode 100644 index 000000000..de318f3f1 --- /dev/null +++ b/analysis/datetime/timestamp/nanoseconds/nanoseconds.go @@ -0,0 +1,52 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nanoseconds + +import ( + "math" + "strconv" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "unix_nano" + +type DateTimeParser struct { +} + +var minBound int64 = math.MinInt64 +var maxBound int64 = math.MaxInt64 + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { + // unix timestamp is milliseconds since UNIX epoch + timestamp, err := strconv.ParseInt(input, 10, 64) + if err != nil { + return time.Time{}, "", analysis.ErrInvalidTimestampString + } + if timestamp < minBound || timestamp > maxBound { + return time.Time{}, "", analysis.ErrInvalidTimestampRange + } + return time.Unix(0, timestamp), "", nil +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + return &DateTimeParser{}, nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/analysis/datetime/timestamp/seconds/seconds.go b/analysis/datetime/timestamp/seconds/seconds.go new file mode 100644 index 000000000..8d3ee3b4a --- /dev/null +++ b/analysis/datetime/timestamp/seconds/seconds.go @@ -0,0 +1,52 @@ +// Copyright (c) 2014 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seconds + +import ( + "math" + "strconv" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "unix_sec" + +type DateTimeParser struct { +} + +var minBound int64 = math.MinInt64 / 1000000000 +var maxBound int64 = math.MaxInt64 / 1000000000 + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { + // unix timestamp is seconds since UNIX epoch + timestamp, err := strconv.ParseInt(input, 10, 64) + if err != nil { + return time.Time{}, "", analysis.ErrInvalidTimestampString + } + if timestamp < minBound || timestamp > maxBound { + return time.Time{}, "", analysis.ErrInvalidTimestampRange + } + return time.Unix(timestamp, 0), "", nil +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + return &DateTimeParser{}, nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/analysis/type.go b/analysis/type.go index 02a8d7e8b..e3a7f201b 100644 --- a/analysis/type.go +++ b/analysis/type.go @@ -99,6 +99,9 @@ func (a *DefaultAnalyzer) Analyze(input []byte) TokenStream { var ErrInvalidDateTime = fmt.Errorf("unable to parse datetime with any of the layouts") +var ErrInvalidTimestampString = fmt.Errorf("unable to parse timestamp string") +var ErrInvalidTimestampRange = fmt.Errorf("timestamp out of range") + type DateTimeParser interface { ParseDateTime(string) (time.Time, string, error) } diff --git a/config/config.go b/config/config.go index e30fe48ff..2f6df4f4d 100644 --- a/config/config.go +++ b/config/config.go @@ -72,6 +72,10 @@ import ( _ "github.com/blevesearch/bleve/v2/analysis/datetime/flexible" _ "github.com/blevesearch/bleve/v2/analysis/datetime/optional" _ "github.com/blevesearch/bleve/v2/analysis/datetime/sanitized" + _ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds" + _ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds" + _ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds" + _ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds" // languages _ "github.com/blevesearch/bleve/v2/analysis/lang/ar" diff --git a/http/doc_get.go b/http/doc_get.go index 34d1e863d..328955eb5 100644 --- a/http/doc_get.go +++ b/http/doc_get.go @@ -17,6 +17,7 @@ package http import ( "fmt" "net/http" + "strconv" index "github.com/blevesearch/bleve_index_api" ) @@ -91,7 +92,7 @@ func (h *DocGetHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { if err == nil { if layout == "" { // layout not set probably means it was indexed as a timestamp - newval = d.UnixNano() + newval = strconv.FormatInt(d.UnixNano(), 10) } else { newval = d.Format(layout) } diff --git a/index_impl.go b/index_impl.go index 6c47872d2..4d82340d2 100644 --- a/index_impl.go +++ b/index_impl.go @@ -21,6 +21,7 @@ import ( "io" "os" "path/filepath" + "strconv" "sync" "sync/atomic" "time" @@ -678,7 +679,7 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest, if err == nil { if layout == "" { // layout not set probably means it was indexed as a timestamp - value = datetime.UnixNano() + value = strconv.FormatInt(datetime.UnixNano(), 10) } else { value = datetime.Format(layout) } diff --git a/search_test.go b/search_test.go index 7f1ef0b81..5221e92f8 100644 --- a/search_test.go +++ b/search_test.go @@ -31,6 +31,10 @@ import ( regexp_char_filter "github.com/blevesearch/bleve/v2/analysis/char/regexp" "github.com/blevesearch/bleve/v2/analysis/datetime/flexible" "github.com/blevesearch/bleve/v2/analysis/datetime/sanitized" + "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds" + "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds" + "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds" + "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds" "github.com/blevesearch/bleve/v2/analysis/token/length" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/shingle" @@ -2994,3 +2998,234 @@ func TestDateRangeFaceQueriesWithCustomDateTimeParser(t *testing.T) { } } } + +func TestDateRangeTimestampQueries(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + imap := mapping.NewIndexMapping() + + // add a date field with a valid format to the default mapping + // for good measure + + dtParserConfig := map[string]interface{}{ + "type": flexible.Name, + "layouts": []interface{}{"2006/01/02 15:04:05"}, + } + err := imap.AddCustomDateTimeParser("custDT", dtParserConfig) + if err != nil { + t.Fatal(err) + } + + dateField := mapping.NewDateTimeFieldMapping() + dateField.DateFormat = "custDT" + + unixSecField := mapping.NewDateTimeFieldMapping() + unixSecField.DateFormat = seconds.Name + + unixMilliSecField := mapping.NewDateTimeFieldMapping() + unixMilliSecField.DateFormat = milliseconds.Name + + unixMicroSecField := mapping.NewDateTimeFieldMapping() + unixMicroSecField.DateFormat = microseconds.Name + + unixNanoSecField := mapping.NewDateTimeFieldMapping() + unixNanoSecField.DateFormat = nanoseconds.Name + + imap.DefaultMapping.AddFieldMappingsAt("date", dateField) + imap.DefaultMapping.AddFieldMappingsAt("seconds", unixSecField) + imap.DefaultMapping.AddFieldMappingsAt("milliseconds", unixMilliSecField) + imap.DefaultMapping.AddFieldMappingsAt("microseconds", unixMicroSecField) + imap.DefaultMapping.AddFieldMappingsAt("nanoseconds", unixNanoSecField) + + idx, err := New(tmpIndexPath, imap) + if err != nil { + t.Fatal(err) + } + defer func() { + err = idx.Close() + if err != nil { + t.Fatal(err) + } + }() + + documents := map[string]map[string]interface{}{ + "doc1": { + "date": "2001/08/20 03:00:10", + "seconds": "998276410", + "milliseconds": "998276410100", + "microseconds": "998276410100300", + "nanoseconds": "998276410100300400", + }, + "doc2": { + "date": "2001/08/20 03:00:20", + "seconds": "998276420", + "milliseconds": "998276410200", + "microseconds": "998276410100400", + "nanoseconds": "998276410100300500", + }, + "doc3": { + "date": "2001/08/20 03:00:30", + "seconds": "998276430", + "milliseconds": "998276410300", + "microseconds": "998276410100500", + "nanoseconds": "998276410100300600", + }, + "doc4": { + "date": "2001/08/20 03:00:40", + "seconds": "998276440", + "milliseconds": "998276410400", + "microseconds": "998276410100600", + "nanoseconds": "998276410100300700", + }, + "doc5": { + "date": "2001/08/20 03:00:50", + "seconds": "998276450", + "milliseconds": "998276410500", + "microseconds": "998276410100700", + "nanoseconds": "998276410100300800", + }, + } + + batch := idx.NewBatch() + for docID, doc := range documents { + err := batch.Index(docID, doc) + if err != nil { + t.Fatal(err) + } + } + err = idx.Batch(batch) + if err != nil { + t.Fatal(err) + } + + type testResult struct { + docID string // doc ID of the hit + hitField string // fields returned as part of the hit + } + type testStruct struct { + start string + end string + field string + expectedHits []testResult + } + + testQueries := []testStruct{ + { + start: "2001-08-20T03:00:05", + end: "2001-08-20T03:00:25", + field: "date", + expectedHits: []testResult{ + { + docID: "doc1", + hitField: "2001/08/20 03:00:10", + }, + { + docID: "doc2", + hitField: "2001/08/20 03:00:20", + }, + }, + }, + { + start: "2001-08-20T03:00:15", + end: "2001-08-20T03:00:35", + field: "seconds", + expectedHits: []testResult{ + { + docID: "doc2", + hitField: "998276420000000000", + }, + { + docID: "doc3", + hitField: "998276430000000000", + }, + }, + }, + { + start: "2001-08-20T03:00:10.150", + end: "2001-08-20T03:00:10.450", + field: "milliseconds", + expectedHits: []testResult{ + { + docID: "doc2", + hitField: "998276410200000000", + }, + { + docID: "doc3", + hitField: "998276410300000000", + }, + { + docID: "doc4", + hitField: "998276410400000000", + }, + }, + }, + { + start: "2001-08-20T03:00:10.100450", + end: "2001-08-20T03:00:10.100650", + field: "microseconds", + expectedHits: []testResult{ + { + docID: "doc3", + hitField: "998276410100500000", + }, + { + docID: "doc4", + hitField: "998276410100600000", + }, + }, + }, + { + start: "2001-08-20T03:00:10.100300550", + end: "2001-08-20T03:00:10.100300850", + field: "nanoseconds", + expectedHits: []testResult{ + { + docID: "doc3", + hitField: "998276410100300600", + }, + { + docID: "doc4", + hitField: "998276410100300700", + }, + { + docID: "doc5", + hitField: "998276410100300800", + }, + }, + }, + } + testLayout := "2006-01-02T15:04:05" + for _, dtq := range testQueries { + startTime, err := time.Parse(testLayout, dtq.start) + if err != nil { + t.Fatal(err) + } + endTime, err := time.Parse(testLayout, dtq.end) + if err != nil { + t.Fatal(err) + } + drq := NewDateRangeQuery(startTime, endTime) + drq.SetField(dtq.field) + + sr := NewSearchRequest(drq) + sr.SortBy([]string{dtq.field}) + sr.Fields = []string{dtq.field} + + res, err := idx.Search(sr) + if err != nil { + t.Fatal(err) + } + if len(res.Hits) != len(dtq.expectedHits) { + t.Fatalf("expected %d hits, got %d", len(dtq.expectedHits), len(res.Hits)) + } + for i, hit := range res.Hits { + if hit.ID != dtq.expectedHits[i].docID { + t.Fatalf("expected docID %s, got %s", dtq.expectedHits[i].docID, hit.ID) + } + if hit.Fields[dtq.field].(string) != dtq.expectedHits[i].hitField { + t.Fatalf("expected hit field %s, got %s", dtq.expectedHits[i].hitField, hit.Fields[dtq.field]) + } + } + } +} \ No newline at end of file