Skip to content

Commit

Permalink
Fix download script (#317)
Browse files Browse the repository at this point in the history
* bundle tiny.en as whisper default model

* improve download-whisper-model script

* improve download-ffmpeg-wasm script
  • Loading branch information
an-lee authored Feb 18, 2024
1 parent da09134 commit fdc3c80
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 42 deletions.
32 changes: 23 additions & 9 deletions enjoy/scripts/download-ffmpeg-wasm.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ await Promise.all(
console.info(chalk.green(`✅ File ${file.name} valid`));
} else {
console.warn(
chalk.yellow(
`❌ File ${file.name} not valid, start to redownload`
)
chalk.yellow(`❌ File ${file.name} not valid, start to redownload`)
);
fs.removeSync(path.join(dir, file.name));
pendingFiles.push(file);
Expand Down Expand Up @@ -81,6 +79,8 @@ if (proxyUrl) {
}

const download = async (url, dest, md5) => {
console.info(chalk.blue(`=> Start to download ${url} to ${dest}`));

return spinner(async () => {
console.info(chalk.blue(`=> Start to download file ${url}`));
await axios
Expand All @@ -89,22 +89,27 @@ const download = async (url, dest, md5) => {
})
.then(async (response) => {
const data = Buffer.from(response.data, "binary");
console.info(chalk.green(`✅ ${dest} downloaded successfully`));

fs.writeFileSync(dest, data);
const hash = await hashFile(dest, { algo: "md5" });
if (hash === md5) {
console.info(chalk.green(`✅ ${dest} downloaded successfully`));
console.info(chalk.green(`✅ ${dest} valid`));
} else {
console.error(
chalk.red(
`❌ Error: ${dest} MD5 not match, ${hash} should be ${md5}`
`❌ Error: ${dest} not valid. \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
process.exit(1);
}
})
.catch((err) => {
console.error(chalk.red(`❌ Error: ${err}`));
console.error(
chalk.red(
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
process.exit(1);
});
});
Expand All @@ -126,20 +131,29 @@ const cleanup = () => {
try {
fs.removeSync(path.join(dir, file.name));
} catch (err) {
console.error(chalk.red(`❌ Error: ${err}`));
console.error(
chalk.red(
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
}
});
};

const baseURL = "https://unpkg.com/@ffmpeg/[email protected]/dist/esm";
// const baseURL = "https://unpkg.com/@ffmpeg/[email protected]/dist/esm";
const baseURL = "https://enjoy-storage.baizhiheizi.com";
try {
await Promise.all(
pendingFiles.map((file) =>
download(`${baseURL}/${file.name}`, path.join(dir, file.name), file.md5)
)
);
} catch (err) {
console.error(chalk.red(`❌ Error: ${err}`));
console.error(
chalk.red(
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
cleanup();
process.exit(1);
}
Expand Down
37 changes: 27 additions & 10 deletions enjoy/scripts/download-whisper-model.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import axios from "axios";
import progress from "progress";
import { createHash } from "crypto";

const model = "ggml-base.en-q5_1.bin";
const md5 = "55309cc6613788f07ac7988985210734";
const model = "ggml-tiny.en.bin";
const sha = "c78c86eb1a8faa21b369bcd33207cc90d64ae9df";

const dir = path.join(process.cwd(), "lib/whisper.cpp/models");

Expand All @@ -15,8 +15,8 @@ fs.ensureDirSync(dir);
try {
if (fs.statSync(path.join(dir, model)).isFile()) {
console.info(chalk.green(`✅ Model ${model} already exists`));
const hash = await hashFile(path.join(dir, model), { algo: "md5" });
if (hash === md5) {
const hash = await hashFile(path.join(dir, model), { algo: "sha1" });
if (hash === sha) {
console.info(chalk.green(`✅ Model ${model} valid`));
process.exit(0);
} else {
Expand Down Expand Up @@ -50,11 +50,12 @@ if (proxyUrl) {
};
}

const modelUrlPrefix =
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
// const modelUrlPrefix =
// "https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
const modelUrlPrefix = "https://enjoy-storage.baizhiheizi.com";

function hashFile(path, options) {
const algo = options.algo || "md5";
const algo = options.algo || "sha1";
return new Promise((resolve, reject) => {
const hash = createHash(algo);
const stream = fs.createReadStream(path);
Expand All @@ -65,6 +66,7 @@ function hashFile(path, options) {
}

const download = async (url, dest) => {
console.info(chalk.blue(`=> Start to download from ${url} to ${dest}`));
return axios
.get(url, { responseType: "stream" })
.then((response) => {
Expand All @@ -82,13 +84,28 @@ const download = async (url, dest) => {
progressBar.tick(chunk.length);
});

response.data.pipe(fs.createWriteStream(dest)).on("close", () => {
response.data.pipe(fs.createWriteStream(dest)).on("close", async () => {
console.info(chalk.green(`✅ Model ${model} downloaded successfully`));
process.exit(0);
const hash = await hashFile(path.join(dir, model), { algo: "sha1" });
if (hash === sha) {
console.info(chalk.green(`✅ Model ${model} valid`));
process.exit(0);
} else {
console.error(
chalk.red(
`❌ Model ${model} not valid, please try again using command \`yarn workspace enjoy download-whisper-model\``
)
);
process.exit(1);
}
});
})
.catch((err) => {
console.error(chalk.red(`❌ Error: ${err}`));
console.error(
chalk.red(
`❌ Failed to download ${url}: ${err}.\nPlease try again using command \`yarn workspace enjoy download-whisper-model\``
)
);
process.exit(1);
});
};
Expand Down
15 changes: 10 additions & 5 deletions enjoy/src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,36 @@ export const WHISPER_MODELS_OPTIONS = [
{
type: "tiny",
name: "ggml-tiny.en.bin",
size: "77.7 MB",
size: "75 MB",
sha: "c78c86eb1a8faa21b369bcd33207cc90d64ae9df",
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin",
},
{
type: "base",
name: "ggml-base.en.bin",
size: "148 MB",
size: "142 MB",
sha: "137c40403d78fd54d454da0f9bd998f78703390c",
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin",
},
{
type: "small",
name: "ggml-small.en.bin",
size: "488 MB",
size: "466 MB",
sha: "db8a495a91d927739e50b3fc1cc4c6b8f6c2d022",
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin",
},
{
type: "medium",
name: "ggml-medium.en.bin",
size: "1.53 GB",
size: "1.5 GB",
sha: "8c30f0e44ce9560643ebd10bbe50cd20eafd3723",
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en.bin",
},
{
type: "large",
name: "ggml-large-v3.bin",
size: "3.09 GB",
size: "2.9 GB",
sha: "ad82bf6a9043ceed055076d0fd39f5f186ff8062",
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin",
},
];
Expand Down
39 changes: 21 additions & 18 deletions enjoy/src/main/whisper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ const logger = log.scope("whisper");

class Whipser {
private binMain: string;
private defaultModel: string;
private bundledModelsDir: string;
public config: WhisperConfigType;

constructor(config?: WhisperConfigType) {
Expand All @@ -20,13 +20,7 @@ class Whipser {
"whisper",
"main"
);
this.defaultModel = path.join(
__dirname,
"lib",
"whisper",
"models",
"ggml-base.en-q5_1.bin"
);
this.bundledModelsDir = path.join(__dirname, "lib", "whisper", "models");
if (fs.existsSync(customWhisperPath)) {
this.binMain = customWhisperPath;
} else {
Expand All @@ -36,23 +30,32 @@ class Whipser {

currentModel() {
if (!this.config.availableModels) return;
if (!this.config.model) {
const model = this.config.availableModels[0];
settings.setSync("whisper.model", this.config.availableModels[0].name);
return model.savePath;

let model: WhisperConfigType["availableModels"][0];
if (this.config.model) {
model = (this.config.availableModels || []).find(
(m) => m.name === this.config.model
);
}
if (!model) {
model = this.config.availableModels[0];
}

return (this.config.availableModels || []).find(
(m) => m.name === this.config.model
)?.savePath;
settings.setSync("whisper.model", model.name);
return model.savePath;
}

async initialize() {
const bundleModels = fs.readdirSync(this.bundledModelsDir);

const dir = path.join(settings.libraryPath(), "whisper", "models");
fs.ensureDirSync(dir);
const files = fs.readdirSync(dir);

const availableModelFiles = bundleModels.concat(files);

const models = [];
for (const file of files) {
for (const file of availableModelFiles) {
const model = WHISPER_MODELS_OPTIONS.find((m) => m.name == file);
if (!model) continue;

Expand Down Expand Up @@ -102,7 +105,7 @@ class Whipser {
async check() {
await this.initialize();

const model = this.currentModel() || this.defaultModel;
const model = this.currentModel();

const sampleFile = path.join(__dirname, "samples", "jfk.wav");
const tmpDir = settings.cachePath();
Expand Down Expand Up @@ -169,7 +172,7 @@ class Whipser {
throw new Error("No file or blob provided");
}

const model = this.currentModel() || this.defaultModel;
const model = this.currentModel();

if (blob) {
const format = blob.type.split("/")[1];
Expand Down

0 comments on commit fdc3c80

Please sign in to comment.