Skip to content

Commit

Permalink
MB-63334: Use faiss's advertized method to normalize vector
Browse files Browse the repository at this point in the history
Uses new version of blevesearch/go-faiss that brings in:
* f19c1d4 Abhi Dangeti | MB-63334: Method to normalize a single vector
  • Loading branch information
abhinavdangeti committed Sep 18, 2024
1 parent 999fe60 commit eeabd6d
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 20 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/bits-and-blooms/bitset v1.12.0
github.com/blevesearch/bleve_index_api v1.1.12
github.com/blevesearch/geo v0.1.20
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475
github.com/blevesearch/go-porterstemmer v1.0.3
github.com/blevesearch/goleveldb v1.0.1
Expand All @@ -32,7 +33,6 @@ require (
)

require (
github.com/blevesearch/go-faiss v1.0.22-0.20240909180832-35a1ff78ead4 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/couchbase/ghistogram v0.1.0 // indirect
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ github.com/blevesearch/bleve_index_api v1.1.12 h1:P4bw9/G/5rulOF7SJ9l4FsDoo7UFJ+
github.com/blevesearch/bleve_index_api v1.1.12/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
github.com/blevesearch/go-faiss v1.0.22-0.20240909180832-35a1ff78ead4 h1:riy8XP3UIBeVjMhsq1r1aGfjvTf3aPp2PuXxdiw9P4s=
github.com/blevesearch/go-faiss v1.0.22-0.20240909180832-35a1ff78ead4/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92 h1:pDbDTN8dgycpdp9eCzrNp9e6Z4C+UQhCUAZbaarQ6Bs=
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
Expand Down
20 changes: 3 additions & 17 deletions mapping/mapping_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ package mapping

import (
"fmt"
"math"
"reflect"

faiss "github.com/blevesearch/go-faiss"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
Expand Down Expand Up @@ -262,20 +262,6 @@ func validateVectorFieldAlias(field *FieldMapping, parentName string,
return nil
}

func NormalizeVector(vector []float32) []float32 {
// first calculate the magnitude of the vector
var mag float64
for _, v := range vector {
mag += float64(v) * float64(v)
}
// cannot normalize a zero vector
// if the magnitude is 1, then the vector is already normalized
if mag != 0 && mag != 1 {
mag = math.Sqrt(mag)
// normalize the vector
for i, v := range vector {
vector[i] = float32(float64(v) / mag)
}
}
return vector
func NormalizeVector(vec []float32) []float32 {
return faiss.NormalizeVector(vec)
}
26 changes: 26 additions & 0 deletions mapping/mapping_vectors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package mapping

import (
"reflect"
"testing"
)

Expand Down Expand Up @@ -306,3 +307,28 @@ func TestProcessVector(t *testing.T) {
}
}
}

func TestNormalizeVector(t *testing.T) {
vectors := [][]float32{
[]float32{1,2,3,4,5},
[]float32{1,0,0,0,0},
[]float32{0.182574183,0.365148365,0.547722578,0.730296731},
[]float32{1,1,1,1,1,1,1,1},
[]float32{0},
}

expectedNormalizedVectors := [][]float32{
[]float32{0.13483998,0.26967996,0.40451995,0.5393599,0.67419994},
[]float32{1,0,0,0,0},
[]float32{0.18257418,0.36514837,0.5477226,0.73029673},
[]float32{0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338},
[]float32{0},
}

for i := 0; i < len(vectors); i++ {
normalizedVector := NormalizeVector(vectors[i])
if !reflect.DeepEqual(normalizedVector, expectedNormalizedVectors[i]) {
t.Errorf("[vector-%d] Expected: %v, Got: %v", i+1, expectedNormalizedVectors[i], normalizedVector)
}
}
}

0 comments on commit eeabd6d

Please sign in to comment.