diff --git a/package.json b/package.json index fdee578b4..c0e37112a 100644 --- a/package.json +++ b/package.json @@ -26,7 +26,8 @@ "lint": "eslint '**/*.ts' '**/*.tsx'", "lint:fix": "eslint --fix '**/*.ts' '**/*.tsx'", "purge-js": "rm `find . -name '*.js'|grep -v node_modules`", - "test:mock": "FORCE_BUILD=true ts-node lib/notion/_mock/build-mock-data.tsx" + "test:mock": "FORCE_BUILD=true ts-node lib/notion/_mock/build-mock-data.tsx", + "dev-cleanup": "git clean -fd && find . -name '*.js.map' -type f -delete" }, "license": "MIT", "dependencies": { @@ -97,4 +98,4 @@ "ts-node": "^10.9.1", "typescript": "^5.2.2" } -} +} \ No newline at end of file diff --git a/src/lib/anki/zip.tsx b/src/lib/anki/zip.tsx deleted file mode 100644 index 7a30f6b3a..000000000 --- a/src/lib/anki/zip.tsx +++ /dev/null @@ -1,82 +0,0 @@ -import { strFromU8, unzipSync } from 'fflate'; -import { Body } from 'aws-sdk/clients/s3'; -import { renderToStaticMarkup } from 'react-dom/server'; -import { getUploadLimits } from '../misc/getUploadLimits'; -import { isHTMLFile, isMarkdownFile, isPDFFile } from '../storage/checks'; - -interface File { - name: string; - contents?: Body | string; -} - -class ZipHandler { - fileNames: string[]; - files: File[]; - zipFileCount: number; - maxZipFiles: number; - - constructor(maxNestedZipFiles: number) { - this.fileNames = []; - this.files = []; - this.zipFileCount = 0; - this.maxZipFiles = maxNestedZipFiles; - } - - build(zipData: Uint8Array, paying: boolean) { - const size = Buffer.byteLength(zipData); - const limits = getUploadLimits(paying); - - if (size > limits.fileSize) { - throw new Error( - renderToStaticMarkup( - <> - Your upload is too big, there is a max of {size} / $ - {limits.fileSize} currently.{' '} - Become a patron to remove - default limit. - - ) - ); - } - - this.processZip(zipData); - } - - private processZip(zipData: Uint8Array) { - if (this.zipFileCount >= this.maxZipFiles) { - throw new Error('Too many zip files in the upload.'); - } - - const loadedZip = unzipSync(zipData, { - filter(file) { - return !file.name.endsWith('/'); - }, - }); - - for (const name in loadedZip) { - const file = loadedZip[name]; - let contents = file; - - if (name.includes('__MACOSX/') || isPDFFile(name)) { - continue; - } - - if (name.endsWith('.zip')) { - this.zipFileCount++; - this.processZip(file); - } else if ((isHTMLFile(name) || isMarkdownFile(name)) && contents) { - this.files.push({ name, contents: strFromU8(file) }); - } else if (contents) { - this.files.push({ name, contents }); - } - } - - this.fileNames = this.files.map((file) => file.name); - } - - getFileNames() { - return this.fileNames; - } -} - -export { ZipHandler, File }; diff --git a/src/lib/parser/DeckParser.ts b/src/lib/parser/DeckParser.ts index 21425d791..22ad76d87 100644 --- a/src/lib/parser/DeckParser.ts +++ b/src/lib/parser/DeckParser.ts @@ -2,7 +2,7 @@ import cheerio from 'cheerio'; import preserveNewlinesIfApplicable from '../../services/NotionService/helpers/preserveNewlinesIfApplicable'; import sanitizeTags from '../anki/sanitizeTags'; -import { File } from '../anki/zip'; +import { File } from '../zip/zip'; import Deck from './Deck'; import Note from './Note'; import Settings from './Settings'; diff --git a/src/lib/parser/experimental/FallbackParser.ts b/src/lib/parser/experimental/FallbackParser.ts index 5e8821496..ffc69fa79 100644 --- a/src/lib/parser/experimental/FallbackParser.ts +++ b/src/lib/parser/experimental/FallbackParser.ts @@ -1,6 +1,6 @@ import cheerio from 'cheerio'; -import { File } from '../../anki/zip'; +import { File } from '../../zip/zip'; import { isHTMLFile, isMarkdownFile, diff --git a/src/lib/parser/exporters/embedFile.ts b/src/lib/parser/exporters/embedFile.ts index c13d1f29e..3b78687ee 100644 --- a/src/lib/parser/exporters/embedFile.ts +++ b/src/lib/parser/exporters/embedFile.ts @@ -1,4 +1,4 @@ -import { File } from '../../anki/zip'; +import { File } from '../../zip/zip'; import { SuffixFrom } from '../../misc/file'; import getUniqueFileName from '../../misc/getUniqueFileName'; import CustomExporter from './CustomExporter'; diff --git a/src/lib/parser/getFileContents.ts b/src/lib/parser/getFileContents.ts index da9c4214a..fe3d45870 100644 --- a/src/lib/parser/getFileContents.ts +++ b/src/lib/parser/getFileContents.ts @@ -1,6 +1,6 @@ import { isHTMLFile, isMarkdownFile } from '../storage/checks'; import { markdownToHTML } from '../markdown'; -import { File } from '../anki/zip'; +import { File } from '../zip/zip'; export function getFileContents(file: File | undefined, convertToHTML = true) { const contents = file?.contents; diff --git a/src/lib/storage/checks.ts b/src/lib/storage/checks.ts index cb178b8a9..f5476fb27 100644 --- a/src/lib/storage/checks.ts +++ b/src/lib/storage/checks.ts @@ -21,4 +21,4 @@ export const isImageFileEmbedable = (url: string) => export const isCSVFile = (fileName: string) => /.csv$/i.exec(fileName); -export const isPDFFile = (fileName: string) => /.pdf$/i.exec(fileName); +export const isPDFFile = (fileName: string) => /.pdf$/i.exec(fileName); \ No newline at end of file diff --git a/src/lib/storage/types.ts b/src/lib/storage/types.ts index 2810cbfff..bec930c3d 100644 --- a/src/lib/storage/types.ts +++ b/src/lib/storage/types.ts @@ -1,4 +1,4 @@ -import { File } from '../anki/zip'; +import { File } from '../zip/zip'; // Multer types export interface UploadedFile extends Express.Multer.File { diff --git a/src/lib/zip/fallback/decompress.ts b/src/lib/zip/fallback/decompress.ts new file mode 100644 index 000000000..31d4873f5 --- /dev/null +++ b/src/lib/zip/fallback/decompress.ts @@ -0,0 +1,8 @@ +import { unpack } from './unpack'; +import { File } from './types'; +import { writeFile } from './writeFile'; + +export function decompress(byteArray: Uint8Array): Promise { + const { workspace, filePath } = writeFile(byteArray); + return unpack(filePath, workspace.location); +} \ No newline at end of file diff --git a/src/lib/zip/fallback/listFiles.ts b/src/lib/zip/fallback/listFiles.ts new file mode 100644 index 000000000..0fc97152c --- /dev/null +++ b/src/lib/zip/fallback/listFiles.ts @@ -0,0 +1,28 @@ +import fs from 'fs'; + +import { File } from './types'; +export async function listFiles(workspace: string) { + const files: File[] = []; + + async function explorePath(currentPath: string) { + const dir = await fs.promises.readdir(currentPath); + for (const fileName of dir) { + const filePath = `${currentPath}/${fileName}`; + const stats = await fs.promises.stat(filePath); + + if (stats.isFile()) { + const buffer = await fs.promises.readFile(filePath); + files.push({ + name: filePath, + contents: new Uint8Array(buffer), + }); + } else if (stats.isDirectory()) { + await explorePath(filePath); // Recursively explore subdirectories + } + } + } + + await explorePath(workspace); + console.log('files', files); + return files; +} diff --git a/src/lib/zip/fallback/processAndPrepareArchiveData.tsx b/src/lib/zip/fallback/processAndPrepareArchiveData.tsx new file mode 100644 index 000000000..adadfe519 --- /dev/null +++ b/src/lib/zip/fallback/processAndPrepareArchiveData.tsx @@ -0,0 +1,42 @@ +import { renderToStaticMarkup } from 'react-dom/server'; +import { getUploadLimits } from '../../misc/getUploadLimits'; +import { decompress } from './decompress'; +import { isZipContentFileSupported } from '../../../usecases/uploads/isZipContentFileSupported'; + +export const processAndPrepareArchiveData = async ( + byteArray: Uint8Array, + isPatron: boolean = false +) => { + const size = Buffer.byteLength(byteArray); + const limits = getUploadLimits(isPatron); + + if (size > limits.fileSize) { + throw new Error( + renderToStaticMarkup( + <> + Your upload is too big, there is a max of {size} / ${limits.fileSize}{' '} + currently. Become a patron{' '} + to remove default limit or{' '} + login. + + ) + ); + } + + const decompressedData = await decompress(byteArray); + const fileNames = decompressedData.map((z) => z.name); + const files = []; + + for (const name of fileNames) { + const file = decompressedData.find((z) => z.name === name); + let contents = file?.contents; + if (isZipContentFileSupported(name) && contents) { + const s = new TextDecoder().decode(contents as Uint8Array); + files.push({ name, contents: s }); + } else if (contents) { + files.push({ name, contents }); + } + } + + return files; +}; \ No newline at end of file diff --git a/src/lib/zip/fallback/types.ts b/src/lib/zip/fallback/types.ts new file mode 100644 index 000000000..2074b7b23 --- /dev/null +++ b/src/lib/zip/fallback/types.ts @@ -0,0 +1,4 @@ +export interface File { + name: string; + contents?: string | Uint8Array; + } \ No newline at end of file diff --git a/src/lib/zip/fallback/unpack.ts b/src/lib/zip/fallback/unpack.ts new file mode 100644 index 000000000..25b3012fd --- /dev/null +++ b/src/lib/zip/fallback/unpack.ts @@ -0,0 +1,24 @@ +import { spawn } from 'node:child_process'; + +import { listFiles } from './listFiles'; +import { File } from './types'; + +const TAR_PATH = '/usr/bin/bsdtar'; + +export function unpack(filePath: string, workspace: string): Promise { + return new Promise((resolve, reject) => { + const decompressProcess = spawn(TAR_PATH, ['xvf', filePath], { + cwd: workspace, + }); + decompressProcess.stdout.on('data', (data) => { + console.log(`tar output: ${data}`); + }); + decompressProcess.stderr.on('data', (data) => { + console.error(`tar error: ${data}`); + }); + decompressProcess.on('close', () => { + // We are not reading the status code because we support partial extraction + listFiles(workspace).then(resolve).catch(reject); + }); + }); +} \ No newline at end of file diff --git a/src/lib/zip/fallback/writeFile.ts b/src/lib/zip/fallback/writeFile.ts new file mode 100644 index 000000000..f90c85dec --- /dev/null +++ b/src/lib/zip/fallback/writeFile.ts @@ -0,0 +1,12 @@ +import { getRandomUUID } from '../../../shared/helpers/getRandomUUID'; +import Workspace from '../../parser/WorkSpace'; +import path from 'path'; +import fs from 'fs'; + +export function writeFile(compressedData: Uint8Array) { + const uuid = getRandomUUID(); + const workspace = new Workspace(true, 'fs'); + const p = path.join(workspace.location, uuid); + fs.writeFileSync(p, compressedData); + return { workspace, filePath: p }; +} \ No newline at end of file diff --git a/src/lib/zip/zip.tsx b/src/lib/zip/zip.tsx new file mode 100644 index 000000000..bf482ffa4 --- /dev/null +++ b/src/lib/zip/zip.tsx @@ -0,0 +1,90 @@ +import { strFromU8, unzipSync } from 'fflate'; +import { Body } from 'aws-sdk/clients/s3'; +import { renderToStaticMarkup } from 'react-dom/server'; +import { getUploadLimits } from '../misc/getUploadLimits'; +import { isHTMLFile, isMarkdownFile, isPDFFile } from '../storage/checks'; +import { processAndPrepareArchiveData } from './fallback/processAndPrepareArchiveData'; + +interface File { + name: string; + contents?: Body | string; +} + +class ZipHandler { + files: File[]; + zipFileCount: number; + maxZipFiles: number; + + constructor(maxNestedZipFiles: number) { + this.files = []; + this.zipFileCount = 0; + this.maxZipFiles = maxNestedZipFiles; + } + + async build(zipData: Uint8Array, paying: boolean) { + const size = Buffer.byteLength(zipData); + const limits = getUploadLimits(paying); + + if (size > limits.fileSize) { + throw new Error( + renderToStaticMarkup( + <> + Your upload is too big, there is a max of {size} / $ + {limits.fileSize} currently.{' '} + Become a patron to remove + default limit. + + ) + ); + } + + await this.processZip(zipData, paying); + } + + private async processZip(zipData: Uint8Array, paying: boolean) { + if (this.zipFileCount >= this.maxZipFiles) { + throw new Error('Too many zip files in the upload.'); + } + + try { + const loadedZip = unzipSync(zipData, { + filter(file) { + return !file.name.endsWith('/'); + }, + }); + + for (const name in loadedZip) { + const file = loadedZip[name]; + let contents = file; + + if (name.includes('__MACOSX/') || isPDFFile(name)) { + continue; + } + + if (name.endsWith('.zip')) { + this.zipFileCount++; + await this.processZip(file, paying); + } else if ((isHTMLFile(name) || isMarkdownFile(name)) && contents) { + this.files.push({ name, contents: strFromU8(file) }); + } else if (contents) { + this.files.push({ name, contents }); + } + } + + } catch (error: unknown) { + if ((error as { code?: number }).code === 13) { + const foundFiles = await processAndPrepareArchiveData(zipData, paying); + this.files.push(...foundFiles); + console.log('files', this.files); + } else { + throw error; + } + } + } + + getFileNames() { + return this.files.map((file) => file.name); + } +} + +export { ZipHandler, File }; diff --git a/src/usecases/uploads/getPackagesFromZip.ts b/src/usecases/uploads/getPackagesFromZip.ts index 30bb4fccc..ce41888a4 100644 --- a/src/usecases/uploads/getPackagesFromZip.ts +++ b/src/usecases/uploads/getPackagesFromZip.ts @@ -1,6 +1,6 @@ import { Body } from 'aws-sdk/clients/s3'; import Settings from '../../lib/parser/Settings'; -import { ZipHandler } from '../../lib/anki/zip'; +import { ZipHandler } from '../../lib/zip/zip'; import { PrepareDeck } from '../../lib/parser/PrepareDeck'; import Package from '../../lib/parser/Package'; import { checkFlashcardsLimits } from '../../lib/User/checkFlashcardsLimits'; @@ -15,11 +15,7 @@ import Workspace from '../../lib/parser/WorkSpace'; import { allowPDFUpload } from './allowPDFUpload'; import { getMaxUploadCount } from '../../lib/misc/getMaxUploadCount'; -export const isFileSupported = (filename: string) => - isHTMLFile(filename) ?? - isMarkdownFile(filename) ?? - isPlainText(filename) ?? - isCSVFile(filename); +import { isZipContentFileSupported } from './isZipContentFileSupported'; export const getPackagesFromZip = async ( fileContents: Body | undefined, @@ -34,14 +30,17 @@ export const getPackagesFromZip = async ( return { packages: [] }; } - zipHandler.build(fileContents as Uint8Array, paying); + await zipHandler.build(fileContents as Uint8Array, paying); const fileNames = zipHandler.getFileNames(); let cardCount = 0; for (const fileName of fileNames) { + /** + * XXX: Should we also support files without extensions? + */ if ( - isFileSupported(fileName) || + isZipContentFileSupported(fileName) || allowPDFUpload(fileName, paying, settings.vertexAIPDFQuestions) ) { const deck = await PrepareDeck({ diff --git a/src/usecases/uploads/isZipContentFileSupported.ts b/src/usecases/uploads/isZipContentFileSupported.ts new file mode 100644 index 000000000..94e804fc9 --- /dev/null +++ b/src/usecases/uploads/isZipContentFileSupported.ts @@ -0,0 +1,10 @@ +import { isHTMLFile, isMarkdownFile, isPlainText, isCSVFile } from "../../lib/storage/checks"; + +/** + * XXX: Should we also support files without extensions? + */ +export const isZipContentFileSupported = (filename: string) => + isHTMLFile(filename) ?? + isMarkdownFile(filename) ?? + isPlainText(filename) ?? + isCSVFile(filename); diff --git a/src/usecases/uploads/worker.ts b/src/usecases/uploads/worker.ts index b7313cff0..7134b0bcb 100644 --- a/src/usecases/uploads/worker.ts +++ b/src/usecases/uploads/worker.ts @@ -5,9 +5,10 @@ import Package from '../../lib/parser/Package'; import fs from 'fs'; import { PrepareDeck } from '../../lib/parser/PrepareDeck'; import { isZIPFile } from '../../lib/storage/checks'; -import { getPackagesFromZip, isFileSupported } from './getPackagesFromZip'; +import { getPackagesFromZip } from './getPackagesFromZip'; import Workspace from '../../lib/parser/WorkSpace'; import { allowPDFUpload } from './allowPDFUpload'; +import { isZipContentFileSupported } from './isZipContentFileSupported'; interface GenerationData { paying: boolean; @@ -29,7 +30,7 @@ function doGenerationWork(data: GenerationData) { const key = file.key; if ( - isFileSupported(filename) || + isZipContentFileSupported(filename) || allowPDFUpload(filename, paying, settings.vertexAIPDFQuestions) ) { const d = await PrepareDeck({