Skip to content

Commit

Permalink
Support reading data from blob URI (#645)
Browse files Browse the repository at this point in the history
* Make blob as valid URL

* Create function to detect the blob URI

* Change to `isValidUrl`

* Remove comment

Co-authored-by: Joshua Lochner <[email protected]>

* Merge `isValidHttpUrl` into `isValidUrl`

* Correct implement

* Update docs

* Add test

* Remove export for `isValidUrl`

* Test read blob via `getFile`

* Use `res.text()` instead `res.body`

---------

Co-authored-by: Joshua Lochner <[email protected]>
  • Loading branch information
hans00 and xenova authored May 8, 2024
1 parent 8bb8c5a commit 880cd3e
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 9 deletions.
21 changes: 12 additions & 9 deletions src/utils/hub.js
Original file line number Diff line number Diff line change
Expand Up @@ -151,23 +151,26 @@ class FileResponse {
}

/**
* Determines whether the given string is a valid HTTP or HTTPS URL.
* @param {string|URL} string The string to test for validity as an HTTP or HTTPS URL.
* Determines whether the given string is a valid URL.
* @param {string|URL} string The string to test for validity as an URL.
* @param {string[]} [protocols=null] A list of valid protocols. If specified, the protocol must be in this list.
* @param {string[]} [validHosts=null] A list of valid hostnames. If specified, the URL's hostname must be in this list.
* @returns {boolean} True if the string is a valid HTTP or HTTPS URL, false otherwise.
* @returns {boolean} True if the string is a valid URL, false otherwise.
*/
function isValidHttpUrl(string, validHosts = null) {
// https://stackoverflow.com/a/43467144
function isValidUrl(string, protocols = null, validHosts = null) {
let url;
try {
url = new URL(string);
} catch (_) {
return false;
}
if (protocols && !protocols.includes(url.protocol)) {
return false;
}
if (validHosts && !validHosts.includes(url.hostname)) {
return false;
}
return url.protocol === "http:" || url.protocol === "https:";
return true;
}

/**
Expand All @@ -178,7 +181,7 @@ function isValidHttpUrl(string, validHosts = null) {
*/
export async function getFile(urlOrPath) {

if (env.useFS && !isValidHttpUrl(urlOrPath)) {
if (env.useFS && !isValidUrl(urlOrPath, ['http:', 'https:', 'blob:'])) {
return new FileResponse(urlOrPath);

} else if (typeof process !== 'undefined' && process?.release?.name === 'node') {
Expand All @@ -189,7 +192,7 @@ export async function getFile(urlOrPath) {
headers.set('User-Agent', `transformers.js/${version}; is_ci/${IS_CI};`);

// Check whether we are making a request to the Hugging Face Hub.
const isHFURL = isValidHttpUrl(urlOrPath, ['huggingface.co', 'hf.co']);
const isHFURL = isValidUrl(urlOrPath, ['http:', 'https:'], ['huggingface.co', 'hf.co']);
if (isHFURL) {
// If an access token is present in the environment variables,
// we add it to the request headers.
Expand Down Expand Up @@ -433,7 +436,7 @@ export async function getModelFile(path_or_repo_id, filename, fatal = true, opti
if (env.allowLocalModels) {
// Accessing local models is enabled, so we try to get the file locally.
// If request is a valid HTTP URL, we skip the local file check. Otherwise, we try to get the file locally.
const isURL = isValidHttpUrl(requestURL);
const isURL = isValidUrl(requestURL, ['http:', 'https:']);
if (!isURL) {
try {
response = await getFile(localPath);
Expand Down
12 changes: 12 additions & 0 deletions tests/utils.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

import { AutoProcessor } from '../src/transformers.js';
import { mel_filter_bank } from '../src/utils/audio.js';
import { getFile } from '../src/utils/hub.js';

import { MAX_TEST_EXECUTION_TIME } from './init.js';

Expand Down Expand Up @@ -42,4 +43,15 @@ describe('Utilities', () => {
}, MAX_TEST_EXECUTION_TIME);

});

describe('Hub utilities', () => {

it('Read data from blob', async () => {
const blob = new Blob(['Hello, world!'], { type: 'text/plain' });
const blobUrl = URL.createObjectURL(blob);
const data = await getFile(blobUrl);
expect(await data.text()).toBe('Hello, world!');
});

});
});

0 comments on commit 880cd3e

Please sign in to comment.