Skip to content

Commit

Permalink
perf: allow for more buffers to be passed between children processes
Browse files Browse the repository at this point in the history
Switching from exec to spawn.
  • Loading branch information
aalemayhu committed Dec 24, 2024
1 parent 92fc0dd commit 09512c3
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 78 deletions.
42 changes: 29 additions & 13 deletions src/lib/anki/CardGenerator.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { execFile } from 'child_process';
import { spawn } from 'child_process';
import { homedir } from 'os';
import path from 'path';

Expand Down Expand Up @@ -31,19 +31,35 @@ class CardGenerator {
];
console.log('execFile', PYTHON(), createDeckScriptPathARGS);
return new Promise((resolve, reject) => {
execFile(
PYTHON(),
createDeckScriptPathARGS,
{ cwd: this.currentDirectory },
(err, stdout) => {
if (err) {
sendError(err);
reject(err);
} else {
resolve(stdout);
}
const process = spawn(PYTHON(), createDeckScriptPathARGS, {
cwd: this.currentDirectory,
});

process.on('error', (err) => {
sendError(err);
reject(err);
});

const stdoutData: string[] = [];
process.stdout.on('data', (data) => {
stdoutData.push(data.toString());
});

const stderrData: string[] = [];
process.stderr.on('data', (data) => {
stderrData.push(data.toString());
});

process.on('close', (code) => {
if (code !== 0) {
const errorOutput = stderrData.join('').trim();
return reject(
new Error(`Python script exited with code ${code}: ${errorOutput}`)
);
}
);
const lastLine = stdoutData.join('').trim().split('\n').pop();
resolve(lastLine);
});
});
}
}
Expand Down
34 changes: 17 additions & 17 deletions src/lib/parser/PrepareDeck.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ interface PrepareDeckResult {
export async function PrepareDeck(
input: DeckParserInput
): Promise<PrepareDeckResult> {
const convertedImageFiles = [];
const convertedFiles = [];

for (const file of input.files) {
if (!file.contents) {
Expand All @@ -36,7 +36,7 @@ export async function PrepareDeck(
const convertedImageContents = await convertImageToHTML(
file.contents?.toString('base64')
);
convertedImageFiles.push({
convertedFiles.push({
name: `${file.name}.html`,
contents: convertedImageContents,
});
Expand All @@ -50,32 +50,32 @@ export async function PrepareDeck(
input.settings.vertexAIPDFQuestions
) {
file.contents = await convertPDFToHTML(file.contents.toString('base64'));
} else {
if (isPPTFile(file.name)) {
file.contents = await convertPPTToPDF(
file.name,
file.contents,
input.workspace
);
}
} else if (isPPTFile(file.name)) {
const pdContents = await convertPPTToPDF(
file.name,
file.contents,
input.workspace
);

file.contents = await convertPDFToImages({
const convertedContents = await convertPDFToImages({
name: file.name,
workspace: input.workspace,
noLimits: input.noLimits,
contents: file.contents,
contents: pdContents,
});
convertedFiles.push({
name: `${file.name}.html`,
contents: convertedContents,
});
}
}

input.files.push(...convertedImageFiles);
input.files.push(...convertedFiles);
const parser = new DeckParser(input);

if (parser.totalCardCount() === 0) {
if (convertedImageFiles.length > 0) {
const htmlFile = convertedImageFiles.find((file) =>
isHTMLFile(file.name)
);
if (convertedFiles.length > 0) {
const htmlFile = convertedFiles.find((file) => isHTMLFile(file.name));
parser.processFirstFile(htmlFile?.name ?? input.name);
} else {
const apkg = await parser.tryExperimental(input.workspace);
Expand Down
52 changes: 31 additions & 21 deletions src/lib/pdf/ConvertPPTToPDF.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { S3 } from 'aws-sdk';
import Workspace from '../parser/WorkSpace';
import path from 'path';
import fs from 'fs/promises';
import { execFile } from 'child_process';
import { spawn } from 'child_process';

export function convertPPTToPDF(
name: string,
Expand All @@ -25,30 +25,40 @@ export function convertPPTToPDF(
path.basename(normalizedName, path.extname(normalizedName)) + '.pdf'
);

execFile(
unoconvBin,
['-f', 'pdf', tempFile],
{
cwd: workspace.location,
},
async (error, stdout, stderr) => {
await fs.writeFile(
path.join(workspace.location, 'stdout.log'),
stdout
);
const unoconvProcess = spawn(unoconvBin, ['-f', 'pdf', tempFile], {
cwd: workspace.location,
});

let stdout = '';
let stderr = '';

unoconvProcess.stdout.on('data', (data) => {
stdout += data;
});

unoconvProcess.stderr.on('data', (data) => {
stderr += data;
});

unoconvProcess.on('close', async (code) => {
await fs.writeFile(
path.join(workspace.location, 'stdout.log'),
stdout
);
await fs.writeFile(
path.join(workspace.location, 'stderr.log'),
stderr
);
if (code !== 0) {
await fs.writeFile(
path.join(workspace.location, 'stderr.log'),
stderr
path.join(workspace.location, 'error.log'),
`Conversion failed with code ${code}`
);
if (error) {
await fs.writeFile(
path.join(workspace.location, 'error.log'),
error.message || 'Conversion failed'
);
}
reject(new Error(`Conversion failed with code ${code}`));
} else {
resolve(await fs.readFile(pdfFile));
}
);
});
})
.catch((err) => reject(new Error(err.message || 'File write failed')));
});
Expand Down
5 changes: 2 additions & 3 deletions src/lib/pdf/convertPDFToImages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export const PDF_EXCEEDS_MAX_PAGE_LIMIT =

export async function convertPDFToImages(
input: ConvertPDFToImagesInput
): Promise<Buffer> {
): Promise<string> {
const { contents, workspace, noLimits, name } = input;
const fileName = name
? path.basename(name).replace(/\.pptx?$/i, '.pdf')
Expand All @@ -42,6 +42,5 @@ export async function convertPDFToImages(
)
);

const html = combineIntoHTML(imagePaths, title);
return Buffer.from(html);
return combineIntoHTML(imagePaths, title);
}
44 changes: 24 additions & 20 deletions src/lib/pdf/convertPage.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { execFile } from 'child_process';
import { spawn } from 'child_process';
import os from 'os';

export function convertPage(
Expand Down Expand Up @@ -26,25 +26,29 @@ export function convertPage(
: '/usr/bin/pdftoppm';

return new Promise((resolve, reject) => {
execFile(
pdftoppmPath,
[
'-png',
'-f',
pageNumber.toString(),
'-l',
pageNumber.toString(),
pdfPath,
outputFileNameBase,
],
(error) => {
if (error) {
return reject(
new Error(`Failed to convert page ${pageNumber} to PNG`)
);
}
resolve(`${outputFileNameBase}-${paddedPageNumber}.png`);
const process = spawn(pdftoppmPath, [
'-png',
'-f',
pageNumber.toString(),
'-l',
pageNumber.toString(),
pdfPath,
outputFileNameBase,
]);

process.on('error', (error) => {
reject(
new Error(
`Failed to convert page ${pageNumber} to PNG: ${error.message}`
)
);
});

process.on('close', (code) => {
if (code !== 0) {
return reject(new Error(`pdftoppm process exited with code ${code}`));
}
);
resolve(`${outputFileNameBase}-${paddedPageNumber}.png`);
});
});
}
36 changes: 32 additions & 4 deletions src/lib/pdf/getPageCount.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,41 @@
import { execFile } from 'child_process';
import { spawn } from 'child_process';
import path from 'path';
import fs from 'fs/promises';

export function getPageCount(pdfPath: string): Promise<number> {
return new Promise((resolve, reject) => {
const pdfinfoBin =
process.platform === 'darwin'
? '/usr/local/bin/pdfinfo'
: '/usr/bin/pdfinfo';
execFile(pdfinfoBin, [pdfPath], (error, stdout) => {
if (error) {

const pdfinfoProcess = spawn(pdfinfoBin, [pdfPath]);

let stdout = '';
let stderr = '';

pdfinfoProcess.stdout.on('data', (data) => {
stdout += data;
});

pdfinfoProcess.stderr.on('data', (data) => {
stderr += data;
});

pdfinfoProcess.on('close', async (code) => {
const pdfDir = path.dirname(pdfPath);
const pdfBaseName = path.basename(pdfPath, path.extname(pdfPath));

await fs.writeFile(
path.join(pdfDir, `${pdfBaseName}_stdout.log`),
stdout
);
await fs.writeFile(
path.join(pdfDir, `${pdfBaseName}_stderr.log`),
stderr
);

if (code !== 0) {
reject(new Error('Failed to execute pdfinfo'));
return;
}
Expand All @@ -16,7 +44,7 @@ export function getPageCount(pdfPath: string): Promise<number> {
stdout
.split('\n')
.find((line) => line.startsWith('Pages:'))
?.split(/\s+/)[1] || '0'
?.split(/\s+/)[1] ?? '0'
);

if (!pageCount) {
Expand Down

0 comments on commit 09512c3

Please sign in to comment.