From f0e63e7a6164570d6469a4e9c670737fb32f7acf Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Tue, 24 Dec 2024 07:05:44 +0100 Subject: [PATCH] perf: allow for more buffers to be passed between children processes Switching from exec to spawn. --- src/lib/anki/CardGenerator.ts | 42 +++++++++++++++++-------- src/lib/parser/PrepareDeck.ts | 34 ++++++++++---------- src/lib/pdf/ConvertPPTToPDF.ts | 52 ++++++++++++++++++------------- src/lib/pdf/convertPDFToImages.ts | 6 ++-- src/lib/pdf/convertPage.ts | 44 ++++++++++++++------------ src/lib/pdf/getPageCount.ts | 34 ++++++++++++++++++-- 6 files changed, 135 insertions(+), 77 deletions(-) diff --git a/src/lib/anki/CardGenerator.ts b/src/lib/anki/CardGenerator.ts index e75dbbf14..e848b7507 100644 --- a/src/lib/anki/CardGenerator.ts +++ b/src/lib/anki/CardGenerator.ts @@ -1,4 +1,4 @@ -import { execFile } from 'child_process'; +import { spawn } from 'child_process'; import { homedir } from 'os'; import path from 'path'; @@ -31,19 +31,35 @@ class CardGenerator { ]; console.log('execFile', PYTHON(), createDeckScriptPathARGS); return new Promise((resolve, reject) => { - execFile( - PYTHON(), - createDeckScriptPathARGS, - { cwd: this.currentDirectory }, - (err, stdout) => { - if (err) { - sendError(err); - reject(err); - } else { - resolve(stdout); - } + const process = spawn(PYTHON(), createDeckScriptPathARGS, { + cwd: this.currentDirectory, + }); + + process.on('error', (err) => { + sendError(err); + reject(err); + }); + + const stdoutData: string[] = []; + process.stdout.on('data', (data) => { + stdoutData.push(data.toString()); + }); + + const stderrData: string[] = []; + process.stderr.on('data', (data) => { + stderrData.push(data.toString()); + }); + + process.on('close', (code) => { + if (code !== 0) { + const errorOutput = stderrData.join('').trim(); + return reject( + new Error(`Python script exited with code ${code}: ${errorOutput}`) + ); } - ); + const lastLine = stdoutData.join('').trim().split('\n').pop(); + resolve(lastLine); + }); }); } } diff --git a/src/lib/parser/PrepareDeck.ts b/src/lib/parser/PrepareDeck.ts index a1f4a7da0..a704ba879 100644 --- a/src/lib/parser/PrepareDeck.ts +++ b/src/lib/parser/PrepareDeck.ts @@ -21,7 +21,7 @@ interface PrepareDeckResult { export async function PrepareDeck( input: DeckParserInput ): Promise { - const convertedImageFiles = []; + const convertedFiles = []; for (const file of input.files) { if (!file.contents) { @@ -36,7 +36,7 @@ export async function PrepareDeck( const convertedImageContents = await convertImageToHTML( file.contents?.toString('base64') ); - convertedImageFiles.push({ + convertedFiles.push({ name: `${file.name}.html`, contents: convertedImageContents, }); @@ -50,32 +50,32 @@ export async function PrepareDeck( input.settings.vertexAIPDFQuestions ) { file.contents = await convertPDFToHTML(file.contents.toString('base64')); - } else { - if (isPPTFile(file.name)) { - file.contents = await convertPPTToPDF( - file.name, - file.contents, - input.workspace - ); - } + } else if (isPPTFile(file.name)) { + const pdContents = await convertPPTToPDF( + file.name, + file.contents, + input.workspace + ); - file.contents = await convertPDFToImages({ + const convertedContents = await convertPDFToImages({ name: file.name, workspace: input.workspace, noLimits: input.noLimits, - contents: file.contents, + contents: pdContents, + }); + convertedFiles.push({ + name: `${file.name}.html`, + contents: convertedContents, }); } } - input.files.push(...convertedImageFiles); + input.files.push(...convertedFiles); const parser = new DeckParser(input); if (parser.totalCardCount() === 0) { - if (convertedImageFiles.length > 0) { - const htmlFile = convertedImageFiles.find((file) => - isHTMLFile(file.name) - ); + if (convertedFiles.length > 0) { + const htmlFile = convertedFiles.find((file) => isHTMLFile(file.name)); parser.processFirstFile(htmlFile?.name ?? input.name); } else { const apkg = await parser.tryExperimental(input.workspace); diff --git a/src/lib/pdf/ConvertPPTToPDF.ts b/src/lib/pdf/ConvertPPTToPDF.ts index 89bdfa2ec..75f5000d9 100644 --- a/src/lib/pdf/ConvertPPTToPDF.ts +++ b/src/lib/pdf/ConvertPPTToPDF.ts @@ -2,7 +2,7 @@ import { S3 } from 'aws-sdk'; import Workspace from '../parser/WorkSpace'; import path from 'path'; import fs from 'fs/promises'; -import { execFile } from 'child_process'; +import { spawn } from 'child_process'; export function convertPPTToPDF( name: string, @@ -25,30 +25,40 @@ export function convertPPTToPDF( path.basename(normalizedName, path.extname(normalizedName)) + '.pdf' ); - execFile( - unoconvBin, - ['-f', 'pdf', tempFile], - { - cwd: workspace.location, - }, - async (error, stdout, stderr) => { - await fs.writeFile( - path.join(workspace.location, 'stdout.log'), - stdout - ); + const unoconvProcess = spawn(unoconvBin, ['-f', 'pdf', tempFile], { + cwd: workspace.location, + }); + + let stdout = ''; + let stderr = ''; + + unoconvProcess.stdout.on('data', (data) => { + stdout += data; + }); + + unoconvProcess.stderr.on('data', (data) => { + stderr += data; + }); + + unoconvProcess.on('close', async (code) => { + await fs.writeFile( + path.join(workspace.location, 'stdout.log'), + stdout + ); + await fs.writeFile( + path.join(workspace.location, 'stderr.log'), + stderr + ); + if (code !== 0) { await fs.writeFile( - path.join(workspace.location, 'stderr.log'), - stderr + path.join(workspace.location, 'error.log'), + `Conversion failed with code ${code}` ); - if (error) { - await fs.writeFile( - path.join(workspace.location, 'error.log'), - error.message || 'Conversion failed' - ); - } + reject(new Error(`Conversion failed with code ${code}`)); + } else { resolve(await fs.readFile(pdfFile)); } - ); + }); }) .catch((err) => reject(new Error(err.message || 'File write failed'))); }); diff --git a/src/lib/pdf/convertPDFToImages.ts b/src/lib/pdf/convertPDFToImages.ts index bee02daf0..1dc0aa590 100644 --- a/src/lib/pdf/convertPDFToImages.ts +++ b/src/lib/pdf/convertPDFToImages.ts @@ -1,3 +1,4 @@ +import fs from 'fs'; import { writeFile } from 'fs/promises'; import path from 'path'; import Workspace from '../parser/WorkSpace'; @@ -19,7 +20,7 @@ export const PDF_EXCEEDS_MAX_PAGE_LIMIT = export async function convertPDFToImages( input: ConvertPDFToImagesInput -): Promise { +): Promise { const { contents, workspace, noLimits, name } = input; const fileName = name ? path.basename(name).replace(/\.pptx?$/i, '.pdf') @@ -42,6 +43,5 @@ export async function convertPDFToImages( ) ); - const html = combineIntoHTML(imagePaths, title); - return Buffer.from(html); + return combineIntoHTML(imagePaths, title); } diff --git a/src/lib/pdf/convertPage.ts b/src/lib/pdf/convertPage.ts index 0e1b1ac84..3a5ac6139 100644 --- a/src/lib/pdf/convertPage.ts +++ b/src/lib/pdf/convertPage.ts @@ -1,4 +1,4 @@ -import { execFile } from 'child_process'; +import { spawn } from 'child_process'; import os from 'os'; export function convertPage( @@ -26,25 +26,29 @@ export function convertPage( : '/usr/bin/pdftoppm'; return new Promise((resolve, reject) => { - execFile( - pdftoppmPath, - [ - '-png', - '-f', - pageNumber.toString(), - '-l', - pageNumber.toString(), - pdfPath, - outputFileNameBase, - ], - (error) => { - if (error) { - return reject( - new Error(`Failed to convert page ${pageNumber} to PNG`) - ); - } - resolve(`${outputFileNameBase}-${paddedPageNumber}.png`); + const process = spawn(pdftoppmPath, [ + '-png', + '-f', + pageNumber.toString(), + '-l', + pageNumber.toString(), + pdfPath, + outputFileNameBase, + ]); + + process.on('error', (error) => { + reject( + new Error( + `Failed to convert page ${pageNumber} to PNG: ${error.message}` + ) + ); + }); + + process.on('close', (code) => { + if (code !== 0) { + return reject(new Error(`pdftoppm process exited with code ${code}`)); } - ); + resolve(`${outputFileNameBase}-${paddedPageNumber}.png`); + }); }); } diff --git a/src/lib/pdf/getPageCount.ts b/src/lib/pdf/getPageCount.ts index 67cab91fc..0ec1ff495 100644 --- a/src/lib/pdf/getPageCount.ts +++ b/src/lib/pdf/getPageCount.ts @@ -1,4 +1,6 @@ -import { execFile } from 'child_process'; +import { spawn } from 'child_process'; +import path from 'path'; +import fs from 'fs/promises'; export function getPageCount(pdfPath: string): Promise { return new Promise((resolve, reject) => { @@ -6,8 +8,34 @@ export function getPageCount(pdfPath: string): Promise { process.platform === 'darwin' ? '/usr/local/bin/pdfinfo' : '/usr/bin/pdfinfo'; - execFile(pdfinfoBin, [pdfPath], (error, stdout) => { - if (error) { + + const pdfinfoProcess = spawn(pdfinfoBin, [pdfPath]); + + let stdout = ''; + let stderr = ''; + + pdfinfoProcess.stdout.on('data', (data) => { + stdout += data; + }); + + pdfinfoProcess.stderr.on('data', (data) => { + stderr += data; + }); + + pdfinfoProcess.on('close', async (code) => { + const pdfDir = path.dirname(pdfPath); + const pdfBaseName = path.basename(pdfPath, path.extname(pdfPath)); + + await fs.writeFile( + path.join(pdfDir, `${pdfBaseName}_stdout.log`), + stdout + ); + await fs.writeFile( + path.join(pdfDir, `${pdfBaseName}_stderr.log`), + stderr + ); + + if (code !== 0) { reject(new Error('Failed to execute pdfinfo')); return; }