From 0d91fdff7b3ec12d8b986c624dd65bde13bb4fa5 Mon Sep 17 00:00:00 2001 From: jonathanpv Date: Fri, 5 Apr 2024 17:52:58 -0500 Subject: [PATCH 1/6] Add binary embedding quantization support to FeatureExtraction pipeline --- src/pipelines.js | 19 +++++++++++++++++++ src/tokenizers.js | 4 ++-- src/utils/tensor.js | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/pipelines.js b/src/pipelines.js index 2b064d522..c38f9f78d 100644 --- a/src/pipelines.js +++ b/src/pipelines.js @@ -67,6 +67,7 @@ import { Tensor, mean_pooling, interpolate, + quantize_embeddings, } from './utils/tensor.js'; import { RawImage } from './utils/image.js'; @@ -1112,6 +1113,8 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te * @typedef {Object} FeatureExtractionPipelineOptions Parameters specific to feature extraction pipelines. * @property {'none'|'mean'|'cls'} [pooling="none"] The pooling method to use. * @property {boolean} [normalize=false] Whether or not to normalize the embeddings in the last dimension. + * @property {boolean} [quantize=false] Whether or not to quantize the embeddings. + * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization. * * @callback FeatureExtractionPipelineCallback Extract the features of the input(s). * @param {string|string[]} texts One or several texts (or one list of texts) to get the features of. @@ -1157,6 +1160,16 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te * // dims: [1, 384] * // } * ``` + * **Example:** Calculating binary embeddings with `sentence-transformers` models. + * ```javascript + * const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2'); + * const output = await extractor('This is a simple test.', { pooling: 'mean', normalize: true, quantize: true, precision: 'binary' }); + * // Tensor { + * // type: 'int8', + * // data: Int8Array [-13, -78, 21, ...], + * // dims: [1, 48] + * // } + * ``` */ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPipelineConstructorArgs) => FeatureExtractionPipelineType} */ (Pipeline)) { /** @@ -1171,6 +1184,8 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip async _call(texts, { pooling = /** @type {'none'} */('none'), normalize = false, + quantize = false, + precision = /** @type {'binary'|'ubinary'} */('binary'), } = {}) { // Run tokenization @@ -1203,6 +1218,10 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip result = result.normalize(2, -1); } + if (quantize) { + result = quantize_embeddings(result, precision); + } + return result; } } diff --git a/src/tokenizers.js b/src/tokenizers.js index 5b58e37c0..e671c8318 100644 --- a/src/tokenizers.js +++ b/src/tokenizers.js @@ -2653,8 +2653,8 @@ export class PreTrainedTokenizer extends Callable { } } else { - if (text === null) { - throw Error('text may not be null') + if (text === null || text === undefined) { + throw Error('text may not be null or undefined') } if (Array.isArray(text_pair)) { diff --git a/src/utils/tensor.js b/src/utils/tensor.js index ccdf781be..5e777daca 100644 --- a/src/utils/tensor.js +++ b/src/utils/tensor.js @@ -1193,3 +1193,46 @@ export function ones(size) { export function ones_like(tensor) { return ones(tensor.dims); } + +/** + * Quantizes the embeddings tensor to binary or unsigned binary precision. + * @param {Tensor} tensor The tensor to quantize. + * @param {'binary'|'ubinary'} precision The precision to use for quantization. + * @returns The quantized tensor. + */ +export function quantize_embeddings(tensor, precision) { + if (tensor.dims.length !== 2) { + throw new Error("The tensor must have 2 dimensions"); + } + if (tensor.dims.at(-1) % 8 !== 0) { + throw new Error("The last dimension of the tensor must be a multiple of 8"); + } + if (!['binary', 'ubinary'].includes(precision)) { + throw new Error("The precision must be either 'binary' or 'ubinary'"); + } + // Create a typed array to store the packed bits + const inputData = tensor.data; + + const signed = precision === 'binary'; + const cls = signed ? Int8Array : Uint8Array; + const dtype = signed ? 'int8' : 'uint8'; + const outputData = new cls(inputData.length / 8); + + // Iterate over each number in the array + for (let i = 0; i < inputData.length; ++i) { + // Determine if the number is greater than 0 + const bit = inputData[i] > 0 ? 1 : 0; + + // Calculate the index in the typed array and the position within the byte + const arrayIndex = Math.floor(i / 8); + const bitPosition = i % 8; + + // Pack the bit into the typed array + outputData[arrayIndex] |= bit << (7 - bitPosition); + if (signed && bitPosition === 0) { + outputData[arrayIndex] -= 128; + } + }; + + return new Tensor(dtype, outputData, [tensor.dims[0], tensor.dims[1] / 8]); +} From c408b17f5f0bd71b65d70c33d3d8a488296170a7 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 10 Apr 2024 12:47:52 +0200 Subject: [PATCH 2/6] Add JSDoc return type --- src/utils/tensor.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/tensor.js b/src/utils/tensor.js index 5e777daca..9e66914e0 100644 --- a/src/utils/tensor.js +++ b/src/utils/tensor.js @@ -1198,7 +1198,7 @@ export function ones_like(tensor) { * Quantizes the embeddings tensor to binary or unsigned binary precision. * @param {Tensor} tensor The tensor to quantize. * @param {'binary'|'ubinary'} precision The precision to use for quantization. - * @returns The quantized tensor. + * @returns {Tensor} The quantized tensor. */ export function quantize_embeddings(tensor, precision) { if (tensor.dims.length !== 2) { From 183f2eeccad645a1d3f2d68c39b8265aa1fe13ea Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 10 Apr 2024 13:17:42 +0200 Subject: [PATCH 3/6] Apply suggestions from code review --- src/utils/tensor.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/utils/tensor.js b/src/utils/tensor.js index 9e66914e0..87461f45c 100644 --- a/src/utils/tensor.js +++ b/src/utils/tensor.js @@ -1210,12 +1210,13 @@ export function quantize_embeddings(tensor, precision) { if (!['binary', 'ubinary'].includes(precision)) { throw new Error("The precision must be either 'binary' or 'ubinary'"); } - // Create a typed array to store the packed bits const inputData = tensor.data; const signed = precision === 'binary'; - const cls = signed ? Int8Array : Uint8Array; const dtype = signed ? 'int8' : 'uint8'; + const cls = signed ? Int8Array : Uint8Array; + + // Create a typed array to store the packed bits const outputData = new cls(inputData.length / 8); // Iterate over each number in the array From 9c5934b6207a8d8593fc6b02c7fb366671ca92f0 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 10 Apr 2024 13:23:15 +0200 Subject: [PATCH 4/6] Update src/utils/tensor.js --- src/utils/tensor.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/tensor.js b/src/utils/tensor.js index 87461f45c..469054cac 100644 --- a/src/utils/tensor.js +++ b/src/utils/tensor.js @@ -1210,13 +1210,13 @@ export function quantize_embeddings(tensor, precision) { if (!['binary', 'ubinary'].includes(precision)) { throw new Error("The precision must be either 'binary' or 'ubinary'"); } - const inputData = tensor.data; const signed = precision === 'binary'; const dtype = signed ? 'int8' : 'uint8'; - const cls = signed ? Int8Array : Uint8Array; // Create a typed array to store the packed bits + const cls = signed ? Int8Array : Uint8Array; + const inputData = tensor.data; const outputData = new cls(inputData.length / 8); // Iterate over each number in the array From bc44f13cba3e90ce057bd6361ec7e871c2b3fa6e Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 10 Apr 2024 15:07:30 +0200 Subject: [PATCH 5/6] Update example code snippet --- src/pipelines.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pipelines.js b/src/pipelines.js index c38f9f78d..d1c7e8220 100644 --- a/src/pipelines.js +++ b/src/pipelines.js @@ -1163,10 +1163,10 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te * **Example:** Calculating binary embeddings with `sentence-transformers` models. * ```javascript * const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2'); - * const output = await extractor('This is a simple test.', { pooling: 'mean', normalize: true, quantize: true, precision: 'binary' }); + * const output = await extractor('This is a simple test.', { pooling: 'mean', quantize: true, precision: 'binary' }); * // Tensor { * // type: 'int8', - * // data: Int8Array [-13, -78, 21, ...], + * // data: Int8Array [49, 108, 24, ...], * // dims: [1, 48] * // } * ``` From cc07ec893007140527bc61aade1050ea916c7e09 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 10 Apr 2024 15:11:51 +0200 Subject: [PATCH 6/6] Update src/pipelines.js --- src/pipelines.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pipelines.js b/src/pipelines.js index d1c7e8220..3b15af53d 100644 --- a/src/pipelines.js +++ b/src/pipelines.js @@ -1185,7 +1185,7 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip pooling = /** @type {'none'} */('none'), normalize = false, quantize = false, - precision = /** @type {'binary'|'ubinary'} */('binary'), + precision = /** @type {'binary'} */('binary'), } = {}) { // Run tokenization