Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MB-63334: Use faiss's advertized method to normalize vector #2075

Merged
merged 1 commit into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/bits-and-blooms/bitset v1.12.0
github.com/blevesearch/bleve_index_api v1.1.12
github.com/blevesearch/geo v0.1.20
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475
github.com/blevesearch/go-porterstemmer v1.0.3
github.com/blevesearch/goleveldb v1.0.1
Expand All @@ -32,7 +33,6 @@ require (
)

require (
github.com/blevesearch/go-faiss v1.0.22-0.20240909180832-35a1ff78ead4 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/couchbase/ghistogram v0.1.0 // indirect
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ github.com/blevesearch/bleve_index_api v1.1.12 h1:P4bw9/G/5rulOF7SJ9l4FsDoo7UFJ+
github.com/blevesearch/bleve_index_api v1.1.12/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
github.com/blevesearch/go-faiss v1.0.22-0.20240909180832-35a1ff78ead4 h1:riy8XP3UIBeVjMhsq1r1aGfjvTf3aPp2PuXxdiw9P4s=
github.com/blevesearch/go-faiss v1.0.22-0.20240909180832-35a1ff78ead4/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92 h1:pDbDTN8dgycpdp9eCzrNp9e6Z4C+UQhCUAZbaarQ6Bs=
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
Expand Down
20 changes: 3 additions & 17 deletions mapping/mapping_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ package mapping

import (
"fmt"
"math"
"reflect"

faiss "github.com/blevesearch/go-faiss"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
Expand Down Expand Up @@ -262,20 +262,6 @@ func validateVectorFieldAlias(field *FieldMapping, parentName string,
return nil
}

func NormalizeVector(vector []float32) []float32 {
// first calculate the magnitude of the vector
var mag float64
for _, v := range vector {
mag += float64(v) * float64(v)
}
// cannot normalize a zero vector
// if the magnitude is 1, then the vector is already normalized
if mag != 0 && mag != 1 {
mag = math.Sqrt(mag)
// normalize the vector
for i, v := range vector {
vector[i] = float32(float64(v) / mag)
}
}
return vector
func NormalizeVector(vec []float32) []float32 {
return faiss.NormalizeVector(vec)
}
26 changes: 26 additions & 0 deletions mapping/mapping_vectors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package mapping

import (
"reflect"
"testing"
)

Expand Down Expand Up @@ -306,3 +307,28 @@ func TestProcessVector(t *testing.T) {
}
}
}

func TestNormalizeVector(t *testing.T) {
vectors := [][]float32{
[]float32{1,2,3,4,5},
[]float32{1,0,0,0,0},
[]float32{0.182574183,0.365148365,0.547722578,0.730296731},
[]float32{1,1,1,1,1,1,1,1},
[]float32{0},
}

expectedNormalizedVectors := [][]float32{
[]float32{0.13483998,0.26967996,0.40451995,0.5393599,0.67419994},
[]float32{1,0,0,0,0},
[]float32{0.18257418,0.36514837,0.5477226,0.73029673},
[]float32{0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338},
[]float32{0},
}

for i := 0; i < len(vectors); i++ {
normalizedVector := NormalizeVector(vectors[i])
if !reflect.DeepEqual(normalizedVector, expectedNormalizedVectors[i]) {
t.Errorf("[vector-%d] Expected: %v, Got: %v", i+1, expectedNormalizedVectors[i], normalizedVector)
}
}
}
Loading