Skip to content

Commit

Permalink
fix: resolve the issue with invalid zip data
Browse files Browse the repository at this point in the history
When the zip file can not be unzipped we attempt to use `tar` as
fallback mechanism. After testing this for a while, can confirm the
fallback is good enough but it cannot replace the zip handler altogether
like attempted before in:

- #1487
- #1495

Instead this approach will only use it if we receive the invalid zip
exception from fflate.
  • Loading branch information
aalemayhu committed Nov 24, 2024
1 parent cb34ff8 commit 8ddbd8f
Show file tree
Hide file tree
Showing 18 changed files with 237 additions and 100 deletions.
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
"lint": "eslint '**/*.ts' '**/*.tsx'",
"lint:fix": "eslint --fix '**/*.ts' '**/*.tsx'",
"purge-js": "rm `find . -name '*.js'|grep -v node_modules`",
"test:mock": "FORCE_BUILD=true ts-node lib/notion/_mock/build-mock-data.tsx"
"test:mock": "FORCE_BUILD=true ts-node lib/notion/_mock/build-mock-data.tsx",
"dev-cleanup": "git clean -fd && find . -name '*.js.map' -type f -delete"
},
"license": "MIT",
"dependencies": {
Expand Down Expand Up @@ -97,4 +98,4 @@
"ts-node": "^10.9.1",
"typescript": "^5.2.2"
}
}
}
82 changes: 0 additions & 82 deletions src/lib/anki/zip.tsx

This file was deleted.

2 changes: 1 addition & 1 deletion src/lib/parser/DeckParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import cheerio from 'cheerio';

import preserveNewlinesIfApplicable from '../../services/NotionService/helpers/preserveNewlinesIfApplicable';
import sanitizeTags from '../anki/sanitizeTags';
import { File } from '../anki/zip';
import { File } from '../zip/zip';
import Deck from './Deck';
import Note from './Note';
import Settings from './Settings';
Expand Down
2 changes: 1 addition & 1 deletion src/lib/parser/experimental/FallbackParser.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import cheerio from 'cheerio';

import { File } from '../../anki/zip';
import { File } from '../../zip/zip';
import {
isHTMLFile,
isMarkdownFile,
Expand Down
2 changes: 1 addition & 1 deletion src/lib/parser/exporters/embedFile.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { File } from '../../anki/zip';
import { File } from '../../zip/zip';
import { SuffixFrom } from '../../misc/file';
import getUniqueFileName from '../../misc/getUniqueFileName';
import CustomExporter from './CustomExporter';
Expand Down
2 changes: 1 addition & 1 deletion src/lib/parser/getFileContents.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { isHTMLFile, isMarkdownFile } from '../storage/checks';
import { markdownToHTML } from '../markdown';
import { File } from '../anki/zip';
import { File } from '../zip/zip';

export function getFileContents(file: File | undefined, convertToHTML = true) {
const contents = file?.contents;
Expand Down
2 changes: 1 addition & 1 deletion src/lib/storage/checks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ export const isImageFileEmbedable = (url: string) =>

export const isCSVFile = (fileName: string) => /.csv$/i.exec(fileName);

export const isPDFFile = (fileName: string) => /.pdf$/i.exec(fileName);
export const isPDFFile = (fileName: string) => /.pdf$/i.exec(fileName);

Check failure on line 24 in src/lib/storage/checks.ts

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Insert `⏎`
2 changes: 1 addition & 1 deletion src/lib/storage/types.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { File } from '../anki/zip';
import { File } from '../zip/zip';

// Multer types
export interface UploadedFile extends Express.Multer.File {
Expand Down
8 changes: 8 additions & 0 deletions src/lib/zip/fallback/decompress.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { unpack } from './unpack';
import { File } from './types';
import { writeFile } from './writeFile';

export function decompress(byteArray: Uint8Array): Promise<File[]> {
const { workspace, filePath } = writeFile(byteArray);
return unpack(filePath, workspace.location);
}

Check failure on line 8 in src/lib/zip/fallback/decompress.ts

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Insert `⏎`
28 changes: 28 additions & 0 deletions src/lib/zip/fallback/listFiles.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import fs from 'fs';

import { File } from './types';
export async function listFiles(workspace: string) {
const files: File[] = [];

async function explorePath(currentPath: string) {
const dir = await fs.promises.readdir(currentPath);
for (const fileName of dir) {
const filePath = `${currentPath}/${fileName}`;
const stats = await fs.promises.stat(filePath);

if (stats.isFile()) {
const buffer = await fs.promises.readFile(filePath);
files.push({
name: filePath,
contents: new Uint8Array(buffer),
});
} else if (stats.isDirectory()) {
await explorePath(filePath); // Recursively explore subdirectories
}
}
}

await explorePath(workspace);
console.log('files', files);
return files;
}
42 changes: 42 additions & 0 deletions src/lib/zip/fallback/processAndPrepareArchiveData.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { renderToStaticMarkup } from 'react-dom/server';
import { getUploadLimits } from '../../misc/getUploadLimits';
import { decompress } from './decompress';
import { isZipContentFileSupported } from '../../../usecases/uploads/isZipContentFileSupported';

export const processAndPrepareArchiveData = async (
byteArray: Uint8Array,
isPatron: boolean = false
) => {
const size = Buffer.byteLength(byteArray);
const limits = getUploadLimits(isPatron);

if (size > limits.fileSize) {
throw new Error(
renderToStaticMarkup(
<>
Your upload is too big, there is a max of {size} / ${limits.fileSize}{' '}
currently. <a href="https://alemayhu.com/patreon">Become a patron</a>{' '}
to remove default limit or{' '}
<a href="https://2anki.net/login#login">login</a>.
</>
)
);
}

const decompressedData = await decompress(byteArray);
const fileNames = decompressedData.map((z) => z.name);
const files = [];

for (const name of fileNames) {
const file = decompressedData.find((z) => z.name === name);
let contents = file?.contents;
if (isZipContentFileSupported(name) && contents) {
const s = new TextDecoder().decode(contents as Uint8Array);
files.push({ name, contents: s });
} else if (contents) {
files.push({ name, contents });
}
}

return files;
};

Check failure on line 42 in src/lib/zip/fallback/processAndPrepareArchiveData.tsx

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Insert `⏎`
4 changes: 4 additions & 0 deletions src/lib/zip/fallback/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
export interface File {
name: string;

Check failure on line 2 in src/lib/zip/fallback/types.ts

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Delete `······`
contents?: string | Uint8Array;

Check failure on line 3 in src/lib/zip/fallback/types.ts

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Delete `······`
}

Check failure on line 4 in src/lib/zip/fallback/types.ts

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Replace `······}` with `}⏎`
24 changes: 24 additions & 0 deletions src/lib/zip/fallback/unpack.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { spawn } from 'node:child_process';

import { listFiles } from './listFiles';
import { File } from './types';

const TAR_PATH = '/usr/bin/bsdtar';

export function unpack(filePath: string, workspace: string): Promise<File[]> {
return new Promise((resolve, reject) => {
const decompressProcess = spawn(TAR_PATH, ['xvf', filePath], {
cwd: workspace,
});
decompressProcess.stdout.on('data', (data) => {
console.log(`tar output: ${data}`);
});
decompressProcess.stderr.on('data', (data) => {
console.error(`tar error: ${data}`);
});
decompressProcess.on('close', () => {
// We are not reading the status code because we support partial extraction
listFiles(workspace).then(resolve).catch(reject);
});
});
}

Check failure on line 24 in src/lib/zip/fallback/unpack.ts

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Insert `⏎`
12 changes: 12 additions & 0 deletions src/lib/zip/fallback/writeFile.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { getRandomUUID } from '../../../shared/helpers/getRandomUUID';
import Workspace from '../../parser/WorkSpace';
import path from 'path';
import fs from 'fs';

export function writeFile(compressedData: Uint8Array) {
const uuid = getRandomUUID();
const workspace = new Workspace(true, 'fs');
const p = path.join(workspace.location, uuid);
fs.writeFileSync(p, compressedData);
return { workspace, filePath: p };
}

Check failure on line 12 in src/lib/zip/fallback/writeFile.ts

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Insert `⏎`
90 changes: 90 additions & 0 deletions src/lib/zip/zip.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { strFromU8, unzipSync } from 'fflate';
import { Body } from 'aws-sdk/clients/s3';
import { renderToStaticMarkup } from 'react-dom/server';
import { getUploadLimits } from '../misc/getUploadLimits';
import { isHTMLFile, isMarkdownFile, isPDFFile } from '../storage/checks';
import { processAndPrepareArchiveData } from './fallback/processAndPrepareArchiveData';

interface File {
name: string;
contents?: Body | string;
}

class ZipHandler {
files: File[];
zipFileCount: number;
maxZipFiles: number;

constructor(maxNestedZipFiles: number) {
this.files = [];
this.zipFileCount = 0;
this.maxZipFiles = maxNestedZipFiles;
}

async build(zipData: Uint8Array, paying: boolean) {
const size = Buffer.byteLength(zipData);
const limits = getUploadLimits(paying);

if (size > limits.fileSize) {
throw new Error(
renderToStaticMarkup(
<>
Your upload is too big, there is a max of {size} / $
{limits.fileSize} currently.{' '}
<a href="https://alemayhu.com/patreon">Become a patron</a> to remove
default limit.
</>
)
);
}

await this.processZip(zipData, paying);
}

private async processZip(zipData: Uint8Array, paying: boolean) {
if (this.zipFileCount >= this.maxZipFiles) {
throw new Error('Too many zip files in the upload.');
}

try {
const loadedZip = unzipSync(zipData, {
filter(file) {
return !file.name.endsWith('/');
},
});

for (const name in loadedZip) {
const file = loadedZip[name];
let contents = file;

if (name.includes('__MACOSX/') || isPDFFile(name)) {
continue;
}

if (name.endsWith('.zip')) {
this.zipFileCount++;
await this.processZip(file, paying);
} else if ((isHTMLFile(name) || isMarkdownFile(name)) && contents) {
this.files.push({ name, contents: strFromU8(file) });
} else if (contents) {
this.files.push({ name, contents });
}
}

Check failure on line 73 in src/lib/zip/zip.tsx

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Delete `⏎`
} catch (error: unknown) {
if ((error as { code?: number }).code === 13) {
const foundFiles = await processAndPrepareArchiveData(zipData, paying);
this.files.push(...foundFiles);
console.log('files', this.files);
} else {
throw error;

Check failure on line 80 in src/lib/zip/zip.tsx

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Insert `··`
}
}
}

getFileNames() {
return this.files.map((file) => file.name);
}
}

export { ZipHandler, File };
Loading

0 comments on commit 8ddbd8f

Please sign in to comment.