diff --git a/.obsidian/workspace.json b/.obsidian/workspace.json index daacc69..3ea22f0 100644 --- a/.obsidian/workspace.json +++ b/.obsidian/workspace.json @@ -13,7 +13,7 @@ "state": { "type": "markdown", "state": { - "file": "_posts/pool/2024-09-09-linux新机器安装nvidia驱动.md", + "file": "_posts/pool/2024-09-10-linux(wget)国内批量下载huggingface模型shell脚本.md", "mode": "source", "source": false } @@ -86,7 +86,7 @@ "state": { "type": "backlink", "state": { - "file": "_posts/pool/2024-09-09-linux新机器安装nvidia驱动.md", + "file": "_posts/pool/2024-09-10-linux(wget)国内批量下载huggingface模型shell脚本.md", "collapseAll": false, "extraContext": false, "sortOrder": "alphabetical", @@ -103,7 +103,7 @@ "state": { "type": "outgoing-link", "state": { - "file": "_posts/pool/2024-09-09-linux新机器安装nvidia驱动.md", + "file": "_posts/pool/2024-09-10-linux(wget)国内批量下载huggingface模型shell脚本.md", "linksCollapsed": false, "unlinkedCollapsed": true } @@ -126,7 +126,7 @@ "state": { "type": "outline", "state": { - "file": "_posts/pool/2024-09-09-linux新机器安装nvidia驱动.md" + "file": "_posts/pool/2024-09-10-linux(wget)国内批量下载huggingface模型shell脚本.md" } } }, @@ -156,10 +156,11 @@ "obsidian-git:Open Git source control": false } }, - "active": "80f516ebdcd05899", + "active": "f880743501d2682d", "lastOpenFiles": [ - "_posts/pool/2024-09-06-windows无cpu启动stable-diffusion API接口.md", "_posts/pool/2024-09-09-linux新机器安装nvidia驱动.md", + "_posts/pool/2024-09-10-linux(wget)国内批量下载huggingface模型shell脚本.md", + "_posts/pool/2024-09-06-windows无cpu启动stable-diffusion API接口.md", "_posts/pool/2024-09-05-日记.md", "_posts/engineering/2024-09-05-免费且私密的联网搜索组件duckduckgo.md", "_posts/pool/2024-09-06-github_page+obsidian实现个人笔记云管理.md", @@ -188,7 +189,6 @@ "_posts/engineering/2024-08-13-linux服务器离线安装anaconda.md", "_posts/engineering/2024-08-13-conda环境库信息导出与复原.md", "_posts/engineering/2024-08-13-两台Linux机器传文件.md", - "_posts/engineering/2024-08-13-脚本内指定GPU部署.md", "_template" ] } \ No newline at end of file diff --git "a/_posts/pool/2024-09-10-linux(wget)\345\233\275\345\206\205\346\211\271\351\207\217\344\270\213\350\275\275huggingface\346\250\241\345\236\213shell\350\204\232\346\234\254.md" "b/_posts/pool/2024-09-10-linux(wget)\345\233\275\345\206\205\346\211\271\351\207\217\344\270\213\350\275\275huggingface\346\250\241\345\236\213shell\350\204\232\346\234\254.md" new file mode 100644 index 0000000..9875e1b --- /dev/null +++ "b/_posts/pool/2024-09-10-linux(wget)\345\233\275\345\206\205\346\211\271\351\207\217\344\270\213\350\275\275huggingface\346\250\241\345\236\213shell\350\204\232\346\234\254.md" @@ -0,0 +1,58 @@ +--- +title: linux(wget)国内批量下载huggingface模型shell脚本 +author: X +date: 2024-09-10 14:16:41 +0800 +categories: + - engineering + - linux +tags: + - shell脚本 + - 黑科技 +--- +今天这个笔记是绝对的常用,因为是shell脚本,理论上不是huggingface的下载链接也可以用,可以说相当实用了(笔者在不同的公司写了三次这个脚本,要是早点写笔记就不用这样了,冷抖泪) + +实现批量下载+自动重命名文件,不需要自己一个一个对着链接重命名啦,比市面上的都快捷。原理很简单,是自动提取url最后一个"/"后的字段,然后根据`?`拆成两部分,只取第一部分,这样就把原链接中的查询字段等多余字段去除,提取正确的文件名。 + +使用方式: +1. 创建一个txt文档,将你需要下载的文件url一行一个粘贴进去 +2. 把这个脚本拖到下载目标文件夹下 +3. 目标文件夹下执行`bash yourscriptname.sh` +4. 命令行填入之前创建的txt文件名 +5. 回车执行即可,日志会记录在`download_log.txt`,如果命令行没有输出就去日志里看一下 + +```shell +#!/bin/bash +# 请确保该脚本有执行权限,可以通过chmod +x scriptname.sh赋予 +echo "请输入urltxt路径:" +read filepath + +# 指定日志文件 +log_file="download_log.txt" + +# 清空或创建日志文件 +> "$log_file" + +while read -r url; do +    # 提取URL中最后一个'/'之后的内容,再按'?'分割取第一部分作为文件名 +    filename=$(echo "${url##*/}" | cut -d '?' -f 1) +    # 检查文件是否已存在 +    if [ -e "$filename" ]; then +        echo "文件'$filename'已存在,跳过下载。" >> "$log_file" +        continue +    fi +    # 使用wget下载资源并重命名,同时记录日志 +    wget --tries=3 --timeout=30 --no-clobber -nv -O "$filename" "$url" 2>> "$log_file" +    if [ $? -eq 0 ]; then +        echo "下载完成并已重命名为: $filename" >> "$log_file" +    else +        echo "下载'$url'失败,请检查URL是否正确或网络连接是否正常。" >> "$log_file" +    fi +done < "$filepath" + +echo "下载任务已完成,详细情况请查看$log_file" +``` + +hf国内镜像站:https://hf-mirror.com/ + +本脚本下一轮迭代计划: +实现解析hf-url自动创建文件夹,例如https://hf-mirror.com/Qwen/Qwen2-VL-7B-Instruct/resolve/main/README.md?download=true,实现自动创建`Qwen2-VL-7B-Instruct/resolve/main/`文件夹并下载至该路径。 \ No newline at end of file