diff --git a/package.json b/package.json
index fdee578b4..c0e37112a 100644
--- a/package.json
+++ b/package.json
@@ -26,7 +26,8 @@
"lint": "eslint '**/*.ts' '**/*.tsx'",
"lint:fix": "eslint --fix '**/*.ts' '**/*.tsx'",
"purge-js": "rm `find . -name '*.js'|grep -v node_modules`",
- "test:mock": "FORCE_BUILD=true ts-node lib/notion/_mock/build-mock-data.tsx"
+ "test:mock": "FORCE_BUILD=true ts-node lib/notion/_mock/build-mock-data.tsx",
+ "dev-cleanup": "git clean -fd && find . -name '*.js.map' -type f -delete"
},
"license": "MIT",
"dependencies": {
@@ -97,4 +98,4 @@
"ts-node": "^10.9.1",
"typescript": "^5.2.2"
}
-}
+}
\ No newline at end of file
diff --git a/src/lib/anki/zip.tsx b/src/lib/anki/zip.tsx
deleted file mode 100644
index 7a30f6b3a..000000000
--- a/src/lib/anki/zip.tsx
+++ /dev/null
@@ -1,82 +0,0 @@
-import { strFromU8, unzipSync } from 'fflate';
-import { Body } from 'aws-sdk/clients/s3';
-import { renderToStaticMarkup } from 'react-dom/server';
-import { getUploadLimits } from '../misc/getUploadLimits';
-import { isHTMLFile, isMarkdownFile, isPDFFile } from '../storage/checks';
-
-interface File {
- name: string;
- contents?: Body | string;
-}
-
-class ZipHandler {
- fileNames: string[];
- files: File[];
- zipFileCount: number;
- maxZipFiles: number;
-
- constructor(maxNestedZipFiles: number) {
- this.fileNames = [];
- this.files = [];
- this.zipFileCount = 0;
- this.maxZipFiles = maxNestedZipFiles;
- }
-
- build(zipData: Uint8Array, paying: boolean) {
- const size = Buffer.byteLength(zipData);
- const limits = getUploadLimits(paying);
-
- if (size > limits.fileSize) {
- throw new Error(
- renderToStaticMarkup(
- <>
- Your upload is too big, there is a max of {size} / $
- {limits.fileSize} currently.{' '}
- Become a patron to remove
- default limit.
- >
- )
- );
- }
-
- this.processZip(zipData);
- }
-
- private processZip(zipData: Uint8Array) {
- if (this.zipFileCount >= this.maxZipFiles) {
- throw new Error('Too many zip files in the upload.');
- }
-
- const loadedZip = unzipSync(zipData, {
- filter(file) {
- return !file.name.endsWith('/');
- },
- });
-
- for (const name in loadedZip) {
- const file = loadedZip[name];
- let contents = file;
-
- if (name.includes('__MACOSX/') || isPDFFile(name)) {
- continue;
- }
-
- if (name.endsWith('.zip')) {
- this.zipFileCount++;
- this.processZip(file);
- } else if ((isHTMLFile(name) || isMarkdownFile(name)) && contents) {
- this.files.push({ name, contents: strFromU8(file) });
- } else if (contents) {
- this.files.push({ name, contents });
- }
- }
-
- this.fileNames = this.files.map((file) => file.name);
- }
-
- getFileNames() {
- return this.fileNames;
- }
-}
-
-export { ZipHandler, File };
diff --git a/src/lib/parser/DeckParser.ts b/src/lib/parser/DeckParser.ts
index 21425d791..22ad76d87 100644
--- a/src/lib/parser/DeckParser.ts
+++ b/src/lib/parser/DeckParser.ts
@@ -2,7 +2,7 @@ import cheerio from 'cheerio';
import preserveNewlinesIfApplicable from '../../services/NotionService/helpers/preserveNewlinesIfApplicable';
import sanitizeTags from '../anki/sanitizeTags';
-import { File } from '../anki/zip';
+import { File } from '../zip/zip';
import Deck from './Deck';
import Note from './Note';
import Settings from './Settings';
diff --git a/src/lib/parser/experimental/FallbackParser.ts b/src/lib/parser/experimental/FallbackParser.ts
index 5e8821496..ffc69fa79 100644
--- a/src/lib/parser/experimental/FallbackParser.ts
+++ b/src/lib/parser/experimental/FallbackParser.ts
@@ -1,6 +1,6 @@
import cheerio from 'cheerio';
-import { File } from '../../anki/zip';
+import { File } from '../../zip/zip';
import {
isHTMLFile,
isMarkdownFile,
diff --git a/src/lib/parser/exporters/embedFile.ts b/src/lib/parser/exporters/embedFile.ts
index c13d1f29e..3b78687ee 100644
--- a/src/lib/parser/exporters/embedFile.ts
+++ b/src/lib/parser/exporters/embedFile.ts
@@ -1,4 +1,4 @@
-import { File } from '../../anki/zip';
+import { File } from '../../zip/zip';
import { SuffixFrom } from '../../misc/file';
import getUniqueFileName from '../../misc/getUniqueFileName';
import CustomExporter from './CustomExporter';
diff --git a/src/lib/parser/getFileContents.ts b/src/lib/parser/getFileContents.ts
index da9c4214a..fe3d45870 100644
--- a/src/lib/parser/getFileContents.ts
+++ b/src/lib/parser/getFileContents.ts
@@ -1,6 +1,6 @@
import { isHTMLFile, isMarkdownFile } from '../storage/checks';
import { markdownToHTML } from '../markdown';
-import { File } from '../anki/zip';
+import { File } from '../zip/zip';
export function getFileContents(file: File | undefined, convertToHTML = true) {
const contents = file?.contents;
diff --git a/src/lib/storage/checks.ts b/src/lib/storage/checks.ts
index cb178b8a9..f5476fb27 100644
--- a/src/lib/storage/checks.ts
+++ b/src/lib/storage/checks.ts
@@ -21,4 +21,4 @@ export const isImageFileEmbedable = (url: string) =>
export const isCSVFile = (fileName: string) => /.csv$/i.exec(fileName);
-export const isPDFFile = (fileName: string) => /.pdf$/i.exec(fileName);
+export const isPDFFile = (fileName: string) => /.pdf$/i.exec(fileName);
\ No newline at end of file
diff --git a/src/lib/storage/types.ts b/src/lib/storage/types.ts
index 2810cbfff..bec930c3d 100644
--- a/src/lib/storage/types.ts
+++ b/src/lib/storage/types.ts
@@ -1,4 +1,4 @@
-import { File } from '../anki/zip';
+import { File } from '../zip/zip';
// Multer types
export interface UploadedFile extends Express.Multer.File {
diff --git a/src/lib/zip/fallback/decompress.ts b/src/lib/zip/fallback/decompress.ts
new file mode 100644
index 000000000..31d4873f5
--- /dev/null
+++ b/src/lib/zip/fallback/decompress.ts
@@ -0,0 +1,8 @@
+import { unpack } from './unpack';
+import { File } from './types';
+import { writeFile } from './writeFile';
+
+export function decompress(byteArray: Uint8Array): Promise {
+ const { workspace, filePath } = writeFile(byteArray);
+ return unpack(filePath, workspace.location);
+}
\ No newline at end of file
diff --git a/src/lib/zip/fallback/listFiles.ts b/src/lib/zip/fallback/listFiles.ts
new file mode 100644
index 000000000..0fc97152c
--- /dev/null
+++ b/src/lib/zip/fallback/listFiles.ts
@@ -0,0 +1,28 @@
+import fs from 'fs';
+
+import { File } from './types';
+export async function listFiles(workspace: string) {
+ const files: File[] = [];
+
+ async function explorePath(currentPath: string) {
+ const dir = await fs.promises.readdir(currentPath);
+ for (const fileName of dir) {
+ const filePath = `${currentPath}/${fileName}`;
+ const stats = await fs.promises.stat(filePath);
+
+ if (stats.isFile()) {
+ const buffer = await fs.promises.readFile(filePath);
+ files.push({
+ name: filePath,
+ contents: new Uint8Array(buffer),
+ });
+ } else if (stats.isDirectory()) {
+ await explorePath(filePath); // Recursively explore subdirectories
+ }
+ }
+ }
+
+ await explorePath(workspace);
+ console.log('files', files);
+ return files;
+}
diff --git a/src/lib/zip/fallback/processAndPrepareArchiveData.tsx b/src/lib/zip/fallback/processAndPrepareArchiveData.tsx
new file mode 100644
index 000000000..adadfe519
--- /dev/null
+++ b/src/lib/zip/fallback/processAndPrepareArchiveData.tsx
@@ -0,0 +1,42 @@
+import { renderToStaticMarkup } from 'react-dom/server';
+import { getUploadLimits } from '../../misc/getUploadLimits';
+import { decompress } from './decompress';
+import { isZipContentFileSupported } from '../../../usecases/uploads/isZipContentFileSupported';
+
+export const processAndPrepareArchiveData = async (
+ byteArray: Uint8Array,
+ isPatron: boolean = false
+) => {
+ const size = Buffer.byteLength(byteArray);
+ const limits = getUploadLimits(isPatron);
+
+ if (size > limits.fileSize) {
+ throw new Error(
+ renderToStaticMarkup(
+ <>
+ Your upload is too big, there is a max of {size} / ${limits.fileSize}{' '}
+ currently. Become a patron{' '}
+ to remove default limit or{' '}
+ login.
+ >
+ )
+ );
+ }
+
+ const decompressedData = await decompress(byteArray);
+ const fileNames = decompressedData.map((z) => z.name);
+ const files = [];
+
+ for (const name of fileNames) {
+ const file = decompressedData.find((z) => z.name === name);
+ let contents = file?.contents;
+ if (isZipContentFileSupported(name) && contents) {
+ const s = new TextDecoder().decode(contents as Uint8Array);
+ files.push({ name, contents: s });
+ } else if (contents) {
+ files.push({ name, contents });
+ }
+ }
+
+ return files;
+};
\ No newline at end of file
diff --git a/src/lib/zip/fallback/types.ts b/src/lib/zip/fallback/types.ts
new file mode 100644
index 000000000..2074b7b23
--- /dev/null
+++ b/src/lib/zip/fallback/types.ts
@@ -0,0 +1,4 @@
+export interface File {
+ name: string;
+ contents?: string | Uint8Array;
+ }
\ No newline at end of file
diff --git a/src/lib/zip/fallback/unpack.ts b/src/lib/zip/fallback/unpack.ts
new file mode 100644
index 000000000..25b3012fd
--- /dev/null
+++ b/src/lib/zip/fallback/unpack.ts
@@ -0,0 +1,24 @@
+import { spawn } from 'node:child_process';
+
+import { listFiles } from './listFiles';
+import { File } from './types';
+
+const TAR_PATH = '/usr/bin/bsdtar';
+
+export function unpack(filePath: string, workspace: string): Promise {
+ return new Promise((resolve, reject) => {
+ const decompressProcess = spawn(TAR_PATH, ['xvf', filePath], {
+ cwd: workspace,
+ });
+ decompressProcess.stdout.on('data', (data) => {
+ console.log(`tar output: ${data}`);
+ });
+ decompressProcess.stderr.on('data', (data) => {
+ console.error(`tar error: ${data}`);
+ });
+ decompressProcess.on('close', () => {
+ // We are not reading the status code because we support partial extraction
+ listFiles(workspace).then(resolve).catch(reject);
+ });
+ });
+}
\ No newline at end of file
diff --git a/src/lib/zip/fallback/writeFile.ts b/src/lib/zip/fallback/writeFile.ts
new file mode 100644
index 000000000..f90c85dec
--- /dev/null
+++ b/src/lib/zip/fallback/writeFile.ts
@@ -0,0 +1,12 @@
+import { getRandomUUID } from '../../../shared/helpers/getRandomUUID';
+import Workspace from '../../parser/WorkSpace';
+import path from 'path';
+import fs from 'fs';
+
+export function writeFile(compressedData: Uint8Array) {
+ const uuid = getRandomUUID();
+ const workspace = new Workspace(true, 'fs');
+ const p = path.join(workspace.location, uuid);
+ fs.writeFileSync(p, compressedData);
+ return { workspace, filePath: p };
+}
\ No newline at end of file
diff --git a/src/lib/zip/zip.tsx b/src/lib/zip/zip.tsx
new file mode 100644
index 000000000..bf482ffa4
--- /dev/null
+++ b/src/lib/zip/zip.tsx
@@ -0,0 +1,90 @@
+import { strFromU8, unzipSync } from 'fflate';
+import { Body } from 'aws-sdk/clients/s3';
+import { renderToStaticMarkup } from 'react-dom/server';
+import { getUploadLimits } from '../misc/getUploadLimits';
+import { isHTMLFile, isMarkdownFile, isPDFFile } from '../storage/checks';
+import { processAndPrepareArchiveData } from './fallback/processAndPrepareArchiveData';
+
+interface File {
+ name: string;
+ contents?: Body | string;
+}
+
+class ZipHandler {
+ files: File[];
+ zipFileCount: number;
+ maxZipFiles: number;
+
+ constructor(maxNestedZipFiles: number) {
+ this.files = [];
+ this.zipFileCount = 0;
+ this.maxZipFiles = maxNestedZipFiles;
+ }
+
+ async build(zipData: Uint8Array, paying: boolean) {
+ const size = Buffer.byteLength(zipData);
+ const limits = getUploadLimits(paying);
+
+ if (size > limits.fileSize) {
+ throw new Error(
+ renderToStaticMarkup(
+ <>
+ Your upload is too big, there is a max of {size} / $
+ {limits.fileSize} currently.{' '}
+ Become a patron to remove
+ default limit.
+ >
+ )
+ );
+ }
+
+ await this.processZip(zipData, paying);
+ }
+
+ private async processZip(zipData: Uint8Array, paying: boolean) {
+ if (this.zipFileCount >= this.maxZipFiles) {
+ throw new Error('Too many zip files in the upload.');
+ }
+
+ try {
+ const loadedZip = unzipSync(zipData, {
+ filter(file) {
+ return !file.name.endsWith('/');
+ },
+ });
+
+ for (const name in loadedZip) {
+ const file = loadedZip[name];
+ let contents = file;
+
+ if (name.includes('__MACOSX/') || isPDFFile(name)) {
+ continue;
+ }
+
+ if (name.endsWith('.zip')) {
+ this.zipFileCount++;
+ await this.processZip(file, paying);
+ } else if ((isHTMLFile(name) || isMarkdownFile(name)) && contents) {
+ this.files.push({ name, contents: strFromU8(file) });
+ } else if (contents) {
+ this.files.push({ name, contents });
+ }
+ }
+
+ } catch (error: unknown) {
+ if ((error as { code?: number }).code === 13) {
+ const foundFiles = await processAndPrepareArchiveData(zipData, paying);
+ this.files.push(...foundFiles);
+ console.log('files', this.files);
+ } else {
+ throw error;
+ }
+ }
+ }
+
+ getFileNames() {
+ return this.files.map((file) => file.name);
+ }
+}
+
+export { ZipHandler, File };
diff --git a/src/usecases/uploads/getPackagesFromZip.ts b/src/usecases/uploads/getPackagesFromZip.ts
index 30bb4fccc..ce41888a4 100644
--- a/src/usecases/uploads/getPackagesFromZip.ts
+++ b/src/usecases/uploads/getPackagesFromZip.ts
@@ -1,6 +1,6 @@
import { Body } from 'aws-sdk/clients/s3';
import Settings from '../../lib/parser/Settings';
-import { ZipHandler } from '../../lib/anki/zip';
+import { ZipHandler } from '../../lib/zip/zip';
import { PrepareDeck } from '../../lib/parser/PrepareDeck';
import Package from '../../lib/parser/Package';
import { checkFlashcardsLimits } from '../../lib/User/checkFlashcardsLimits';
@@ -15,11 +15,7 @@ import Workspace from '../../lib/parser/WorkSpace';
import { allowPDFUpload } from './allowPDFUpload';
import { getMaxUploadCount } from '../../lib/misc/getMaxUploadCount';
-export const isFileSupported = (filename: string) =>
- isHTMLFile(filename) ??
- isMarkdownFile(filename) ??
- isPlainText(filename) ??
- isCSVFile(filename);
+import { isZipContentFileSupported } from './isZipContentFileSupported';
export const getPackagesFromZip = async (
fileContents: Body | undefined,
@@ -34,14 +30,17 @@ export const getPackagesFromZip = async (
return { packages: [] };
}
- zipHandler.build(fileContents as Uint8Array, paying);
+ await zipHandler.build(fileContents as Uint8Array, paying);
const fileNames = zipHandler.getFileNames();
let cardCount = 0;
for (const fileName of fileNames) {
+ /**
+ * XXX: Should we also support files without extensions?
+ */
if (
- isFileSupported(fileName) ||
+ isZipContentFileSupported(fileName) ||
allowPDFUpload(fileName, paying, settings.vertexAIPDFQuestions)
) {
const deck = await PrepareDeck({
diff --git a/src/usecases/uploads/isZipContentFileSupported.ts b/src/usecases/uploads/isZipContentFileSupported.ts
new file mode 100644
index 000000000..94e804fc9
--- /dev/null
+++ b/src/usecases/uploads/isZipContentFileSupported.ts
@@ -0,0 +1,10 @@
+import { isHTMLFile, isMarkdownFile, isPlainText, isCSVFile } from "../../lib/storage/checks";
+
+/**
+ * XXX: Should we also support files without extensions?
+ */
+export const isZipContentFileSupported = (filename: string) =>
+ isHTMLFile(filename) ??
+ isMarkdownFile(filename) ??
+ isPlainText(filename) ??
+ isCSVFile(filename);
diff --git a/src/usecases/uploads/worker.ts b/src/usecases/uploads/worker.ts
index b7313cff0..7134b0bcb 100644
--- a/src/usecases/uploads/worker.ts
+++ b/src/usecases/uploads/worker.ts
@@ -5,9 +5,10 @@ import Package from '../../lib/parser/Package';
import fs from 'fs';
import { PrepareDeck } from '../../lib/parser/PrepareDeck';
import { isZIPFile } from '../../lib/storage/checks';
-import { getPackagesFromZip, isFileSupported } from './getPackagesFromZip';
+import { getPackagesFromZip } from './getPackagesFromZip';
import Workspace from '../../lib/parser/WorkSpace';
import { allowPDFUpload } from './allowPDFUpload';
+import { isZipContentFileSupported } from './isZipContentFileSupported';
interface GenerationData {
paying: boolean;
@@ -29,7 +30,7 @@ function doGenerationWork(data: GenerationData) {
const key = file.key;
if (
- isFileSupported(filename) ||
+ isZipContentFileSupported(filename) ||
allowPDFUpload(filename, paying, settings.vertexAIPDFQuestions)
) {
const d = await PrepareDeck({