Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for uploading PPT(x) files #1666

Merged
merged 3 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions src/lib/misc/isLimitError.test.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import { PDF_EXCEEDS_MAX_PAGE_LIMIT } from '../parser/pdf/convertPDFToImages';
import { PDF_EXCEEDS_MAX_PAGE_LIMIT } from '../pdf/convertPDFToImages';
import { isLimitError } from './isLimitError';

const MOCK_MSG = "<div class=\"content\"><h3 class=\"title is-3\">Your request has hit the limit</h3><ul><li>Split your request into multiple smaller ones (i.e.) make your upload size smaller.</li><li><div class=\"is-flex is-align-items-center\"><a class=\"button is-success is-medium mr-2\" href=\"https://buy.stripe.com/eVadTGcCI6Ny73qfZ0\">Subscribe</a> for only $2 per month to remove all the limits.</div></li><li>Or <a href=\"https://alemayhu.com/patreon\">Become a patron</a> to support me.</li></ul><p>If you already have an account, please <a href=\"/login?redirect=/upload\">login</a> and try again. If you are still experiencing issues, please contact <a href=\"mailto:[email protected]\">[email protected]</a>.</p></div>";
const MOCK_MSG =
'<div class="content"><h3 class="title is-3">Your request has hit the limit</h3><ul><li>Split your request into multiple smaller ones (i.e.) make your upload size smaller.</li><li><div class="is-flex is-align-items-center"><a class="button is-success is-medium mr-2" href="https://buy.stripe.com/eVadTGcCI6Ny73qfZ0">Subscribe</a> for only $2 per month to remove all the limits.</div></li><li>Or <a href="https://alemayhu.com/patreon">Become a patron</a> to support me.</li></ul><p>If you already have an account, please <a href="/login?redirect=/upload">login</a> and try again. If you are still experiencing issues, please contact <a href="mailto:[email protected]">[email protected]</a>.</p></div>';

describe('isLimitError', () => {
it('returns true ', () => {
expect(isLimitError(new Error("File too large"))).toBe(true);
expect(isLimitError(new Error("You can only add 100 cards"))).toBe(true);
expect(isLimitError(new Error('File too large'))).toBe(true);
expect(isLimitError(new Error('You can only add 100 cards'))).toBe(true);
expect(isLimitError(new Error(PDF_EXCEEDS_MAX_PAGE_LIMIT))).toBe(true);
});

it('returns true for html', () => {
expect(isLimitError(new Error(MOCK_MSG))).toBe(true);
})
});

it("returns false", () => {
expect(isLimitError(new Error("File too small"))).toBe(false);
it('returns false', () => {
expect(isLimitError(new Error('File too small'))).toBe(false);
expect(isLimitError()).toBe(false);
})
});
});
2 changes: 1 addition & 1 deletion src/lib/misc/isLimitError.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { PDF_EXCEEDS_MAX_PAGE_LIMIT } from '../parser/pdf/convertPDFToImages';
import { PDF_EXCEEDS_MAX_PAGE_LIMIT } from '../pdf/convertPDFToImages';

const LIMIT_MESSAGES = [
'File too large',
Expand Down
22 changes: 18 additions & 4 deletions src/lib/parser/PrepareDeck.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import getDeckFilename from '../anki/getDeckFilename';
import { DeckParser, DeckParserInput } from './DeckParser';
import Deck from './Deck';
import { isPDFFile } from '../storage/checks';
import { isPDFFile, isPPTFile } from '../storage/checks';
import { convertPDFToHTML } from './experimental/VertexAPI/convertPDFToHTML';
import { convertPDFToImages } from './pdf/convertPDFToImages';
import { convertPDFToImages } from '../pdf/convertPDFToImages';
import { convertPPTToPDF } from '../pdf/ConvertPPTToPDF';

interface PrepareDeckResult {
name: string;
Expand All @@ -15,11 +16,24 @@ export async function PrepareDeck(
input: DeckParserInput
): Promise<PrepareDeckResult> {
for (const file of input.files) {
if (!isPDFFile(file.name) || !file.contents) continue;
if ((!isPDFFile(file.name) && !isPPTFile(file.name)) || !file.contents)
continue;

if (input.noLimits && input.settings.vertexAIPDFQuestions) {
if (
isPDFFile(file.name) &&
input.noLimits &&
input.settings.vertexAIPDFQuestions
) {
file.contents = await convertPDFToHTML(file.contents.toString('base64'));
} else {
if (isPPTFile(file.name)) {
file.contents = await convertPPTToPDF(
file.name,
file.contents,
input.workspace
);
}

file.contents = await convertPDFToImages({
name: file.name,
workspace: input.workspace,
Expand Down
135 changes: 0 additions & 135 deletions src/lib/parser/pdf/convertPDFToImages.ts

This file was deleted.

50 changes: 50 additions & 0 deletions src/lib/pdf/ConvertPPTToPDF.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { S3 } from 'aws-sdk';
import Workspace from '../parser/WorkSpace';
import path from 'path';
import fs from 'fs/promises';
import { execFile } from 'child_process';

export function convertPPTToPDF(
name: string,
contents: S3.Body,
workspace: Workspace
): Promise<Buffer> {
return new Promise((resolve, reject) => {
const sofficeBin =
process.platform === 'darwin'
? '/Applications/LibreOffice.app/Contents/MacOS/soffice'
: '/usr/bin/soffice';
const tempFile = path.join(workspace.location, name);

fs.writeFile(tempFile, Buffer.from(contents as Buffer))
.then(() => {
const pdfFile = path.join(
workspace.location,
path.basename(name, path.extname(name)) + '.pdf'
);

execFile(
sofficeBin,
['--headless', '--convert-to', 'pdf', tempFile],
{
cwd: workspace.location,
},
async (error, stdout, stderr) => {
await fs.writeFile(
path.join(workspace.location, 'stdout.log'),
stdout
);
await fs.writeFile(
path.join(workspace.location, 'stderr.log'),
stderr
);
if (error) {
reject(new Error(error.message || 'Conversion failed'));
}
resolve(await fs.readFile(pdfFile));
}
);
})
.catch((err) => reject(new Error(err.message || 'File write failed')));
});
}
26 changes: 26 additions & 0 deletions src/lib/pdf/combineIntoHTML.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import path from 'path';

export function combineIntoHTML(imagePaths: string[], title: string): string {
const html = `<!DOCTYPE html>
<html>
<head><title>${title}</title></head>
<body>
${Array.from({ length: imagePaths.length / 2 }, (_, i) => {
const front = path.basename(imagePaths[i * 2]);
const back = path.basename(imagePaths[i * 2 + 1]);
return `<ul class="toggle">
<li>
<details>
<summary>
<img src="${front}" />
</summary>
<img src="${back}" />
</details>
</li>
</ul>`;
}).join('\n')}
</body>
</html>`;

return html;
}
40 changes: 40 additions & 0 deletions src/lib/pdf/convertPDFToImages.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { writeFile } from 'fs/promises';
import path from 'path';
import Workspace from '../parser/WorkSpace';
import { S3 } from 'aws-sdk';
import { getPageCount } from './getPageCount';
import { convertPage } from './convertPage';
import { combineIntoHTML } from './combineIntoHTML';

interface ConvertPDFToImagesInput {
workspace: Workspace;
noLimits: boolean;
contents?: S3.Body;
name?: string;
}

export const PDF_EXCEEDS_MAX_PAGE_LIMIT =
'PDF exceeds maximum page limit of 100 for free and anonymous users.';

export async function convertPDFToImages(
input: ConvertPDFToImagesInput
): Promise<Buffer> {
const { contents, workspace, noLimits, name } = input;
const pdfPath = path.join(workspace.location, name ?? 'Default.pdf');
await writeFile(pdfPath, Buffer.from(contents as Buffer));

const pageCount = await getPageCount(pdfPath);
const title = path.basename(pdfPath);
if (!noLimits && pageCount > 100) {
throw new Error(PDF_EXCEEDS_MAX_PAGE_LIMIT);
}

const imagePaths = await Promise.all(
Array.from({ length: pageCount }, (_, i) =>
convertPage(pdfPath, i + 1, pageCount)
)
);

const html = combineIntoHTML(imagePaths, title);
return Buffer.from(html);
}
50 changes: 50 additions & 0 deletions src/lib/pdf/convertPage.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { execFile } from 'child_process';
import os from 'os';

export function convertPage(
pdfPath: string,
pageNumber: number,
totalPageCount: number
): Promise<string> {
const outputFileNameBase = `${pdfPath}-page${pageNumber}`;

const determinePaddingLength = (pageCount: number): number => {
if (pageCount >= 1000) return 4;
if (pageCount >= 100) return 3;
if (pageCount >= 10) return 2;
return 1;
};

const paddedPageNumber = String(pageNumber).padStart(
determinePaddingLength(totalPageCount),
'0'
);

const pdftoppmPath =
os.platform() === 'darwin'
? '/usr/local/bin/pdftoppm'
: '/usr/bin/pdftoppm';

return new Promise((resolve, reject) => {
execFile(
pdftoppmPath,
[
'-png',
'-f',
pageNumber.toString(),
'-l',
pageNumber.toString(),
pdfPath,
outputFileNameBase,
],
(error) => {
if (error) {
return reject(
new Error(`Failed to convert page ${pageNumber} to PNG`)
);
}
resolve(`${outputFileNameBase}-${paddedPageNumber}.png`);
}
);
});
}
Loading
Loading