-
-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Closes: #1471
- Loading branch information
Showing
10 changed files
with
165 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import { writeFile } from 'fs/promises'; | ||
import path from 'path'; | ||
import { execFile } from 'child_process'; | ||
import Workspace from '../WorkSpace'; | ||
import { S3 } from 'aws-sdk'; | ||
|
||
function getPageCount(pdfPath: string): Promise<number> { | ||
return new Promise((resolve, reject) => { | ||
execFile('/usr/local/bin/pdfinfo', [pdfPath], (error, stdout) => { | ||
if (error) { | ||
reject(new Error('Failed to execute pdfinfo')); | ||
return; | ||
} | ||
|
||
const pageCount = parseInt( | ||
stdout | ||
.split('\n') | ||
.find((line) => line.startsWith('Pages:')) | ||
?.split(/\s+/)[1] || '0' | ||
); | ||
|
||
if (!pageCount) { | ||
reject(new Error('Failed to get page count')); | ||
return; | ||
} | ||
|
||
resolve(pageCount); | ||
}); | ||
}); | ||
} | ||
|
||
function convertPage(pdfPath: string, page: number): Promise<string> { | ||
return new Promise((resolve, reject) => { | ||
const outputBase = `${pdfPath}-page${page}`; | ||
execFile( | ||
'pdftoppm', | ||
[ | ||
'-png', | ||
'-f', | ||
page.toString(), | ||
'-l', | ||
page.toString(), | ||
pdfPath, | ||
outputBase, | ||
], | ||
(error) => { | ||
if (error) { | ||
reject(new Error(`Failed to convert page ${page} to PNG`)); | ||
return; | ||
} | ||
resolve(outputBase + `-${page}.png`); | ||
} | ||
); | ||
}); | ||
} | ||
|
||
function combineIntoHTML(imagePaths: string[]): string { | ||
const html = `<!DOCTYPE html> | ||
<html> | ||
<body> | ||
${Array.from({ length: imagePaths.length / 2 }, (_, i) => { | ||
const front = path.basename(imagePaths[i * 2]); | ||
const back = path.basename(imagePaths[i * 2 + 1]); | ||
return `<ul class="toggle"> | ||
<li> | ||
<details> | ||
<summary> | ||
<img src="${front}" /> | ||
</summary> | ||
<img src="${back}" /> | ||
</details> | ||
</li> | ||
</ul>`; | ||
}).join('\n')} | ||
</body> | ||
</html>`; | ||
|
||
return html; | ||
} | ||
|
||
export async function convertPDFToImages( | ||
pdfBuffer: S3.Body, | ||
workspace: Workspace, | ||
noLimits = false | ||
): Promise<Buffer> { | ||
const pdfPath = path.join(workspace.location, 'input.pdf'); | ||
await writeFile(pdfPath, Buffer.from(pdfBuffer as Buffer)); | ||
|
||
const pageCount = await getPageCount(pdfPath); | ||
if (!noLimits && pageCount > 100) { | ||
throw new Error('PDF exceeds maximum page limit of 100'); | ||
} | ||
|
||
const imagePaths = await Promise.all( | ||
Array.from({ length: pageCount }, (_, i) => convertPage(pdfPath, i + 1)) | ||
); | ||
|
||
const html = await combineIntoHTML(imagePaths); | ||
return Buffer.from(html); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters