Skip to content

Commit

Permalink
Allow to asynchronously generate PDFs (#65)
Browse files Browse the repository at this point in the history
The current implementation of `HeadlessChrome::toPdf()` always assumes
that it controls the event loop instance, i.e. `HeadlessChrome` creates
and starts the event loop manually. This may work for most use cases as
they are mostly triggered via Icinga Web, but not if you want to
generate PDFs using a daemon. Since our scheduler uses the same global
event instance, it is unfavourable to call `Factory::create()` over
again occasionally.

refs Icinga/icingaweb2-module-reporting#229
  • Loading branch information
raviks789 authored May 7, 2024
2 parents 91529dd + 8e7ff15 commit 43c4232
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 104 deletions.
228 changes: 144 additions & 84 deletions library/Pdfexport/HeadlessChrome.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
use ipl\Html\HtmlString;
use LogicException;
use React\ChildProcess\Process;
use React\EventLoop\Factory;
use React\EventLoop\Loop;
use React\EventLoop\TimerInterface;
use React\Promise;
use React\Promise\ExtendedPromiseInterface;
use Throwable;
use WebSocket\Client;
use WebSocket\ConnectionException;

Expand Down Expand Up @@ -240,111 +243,168 @@ public function fromHtml($html, $asFile = false)
}

/**
* Export to PDF
* Generate a PDF raw string asynchronously.
*
* @return string
* @throws Exception
* @return ExtendedPromiseInterface
*/
public function toPdf()
public function asyncToPdf(): ExtendedPromiseInterface
{
switch (true) {
case $this->remote !== null:
try {
$result = $this->jsonVersion($this->remote[0], $this->remote[1]);
$parts = explode('/', $result['webSocketDebuggerUrl']);
$pdf = $this->printToPDF(
join(':', $this->remote),
end($parts),
! $this->document->isEmpty() ? $this->document->getPrintParameters() : []
);
break;
} catch (Exception $e) {
if ($this->binary === null) {
throw $e;
} else {
$deferred = new Promise\Deferred();
Loop::futureTick(function () use ($deferred) {
switch (true) {
case $this->remote !== null:
try {
$result = $this->jsonVersion($this->remote[0], $this->remote[1]);
if (is_array($result)) {
$parts = explode('/', $result['webSocketDebuggerUrl']);
$pdf = $this->printToPDF(
join(':', $this->remote),
end($parts),
! $this->document->isEmpty() ? $this->document->getPrintParameters() : []
);
break;
}
} catch (Exception $e) {
if ($this->binary == null) {
$deferred->reject($e);
return;
}

Logger::warning(
'Failed to connect to remote chrome: %s:%d (%s)',
$this->remote[0],
$this->remote[1],
$e
);
}
}

// Fallback to the local binary if a remote chrome is unavailable
case $this->binary !== null:
$browserHome = $this->getFileStorage()->resolvePath('HOME');
$commandLine = join(' ', [
escapeshellarg($this->getBinary()),
static::renderArgumentList([
'--bwsi',
'--headless',
'--disable-gpu',
'--no-sandbox',
'--no-first-run',
'--disable-dev-shm-usage',
'--remote-debugging-port=0',
'--homedir=' => $browserHome,
'--user-data-dir=' => $browserHome
])
]);

if (Platform::isLinux()) {
Logger::debug('Starting browser process: HOME=%s exec %s', $browserHome, $commandLine);
$chrome = new Process('exec ' . $commandLine, null, ['HOME' => $browserHome]);
} else {
Logger::debug('Starting browser process: %s', $commandLine);
$chrome = new Process($commandLine);
}
// Reject the promise if we didn't get the expected output from the /json/version endpoint.
if ($this->binary === null) {
$deferred->reject(
new Exception('Failed to determine remote chrome version via the /json/version endpoint.')
);
return;
}

$loop = Factory::create();
// Fallback to the local binary if a remote chrome is unavailable
case $this->binary !== null:
$browserHome = $this->getFileStorage()->resolvePath('HOME');
$commandLine = join(' ', [
escapeshellarg($this->getBinary()),
static::renderArgumentList([
'--bwsi',
'--headless',
'--disable-gpu',
'--no-sandbox',
'--no-first-run',
'--disable-dev-shm-usage',
'--remote-debugging-port=0',
'--homedir=' => $browserHome,
'--user-data-dir=' => $browserHome
])
]);

if (Platform::isLinux()) {
Logger::debug('Starting browser process: HOME=%s exec %s', $browserHome, $commandLine);
$chrome = new Process('exec ' . $commandLine, null, ['HOME' => $browserHome]);
} else {
Logger::debug('Starting browser process: %s', $commandLine);
$chrome = new Process($commandLine);
}

$killer = $loop->addTimer(10, function (TimerInterface $timer) use ($chrome) {
$chrome->terminate(6); // SIGABRT
Logger::error(
'Terminated browser process after %d seconds elapsed without the expected output',
$timer->getInterval()
);
});
$killer = Loop::addTimer(10, function (TimerInterface $timer) use ($chrome, $deferred) {
$chrome->terminate(6); // SIGABRT

$chrome->start($loop);
Logger::error(
'Browser timed out after %d seconds without the expected output',
$timer->getInterval()
);

$pdf = null;
$chrome->stderr->on('data', function ($chunk) use (&$pdf, $chrome, $loop, $killer) {
Logger::debug('Caught browser output: %s', $chunk);
$deferred->reject(
new Exception(
'Received empty response or none at all from browser.'
. ' Please check the logs for further details.'
)
);
});

$chrome->start();

$chrome->stderr->on('data', function ($chunk) use ($chrome, $deferred, $killer) {
Logger::debug('Caught browser output: %s', $chunk);

if (preg_match(self::DEBUG_ADDR_PATTERN, trim($chunk), $matches)) {
Loop::cancelTimer($killer);

try {
$pdf = $this->printToPDF(
$matches[1],
$matches[2],
! $this->document->isEmpty() ? $this->document->getPrintParameters() : []
);
} catch (Exception $e) {
Logger::error('Failed to print PDF. An error occurred: %s', $e);
}

$chrome->terminate();

if (! empty($pdf)) {
$deferred->resolve($pdf);
} else {
$deferred->reject(
new Exception(
'Received empty response or none at all from browser.'
. ' Please check the logs for further details.'
)
);
}
}
});

if (preg_match(self::DEBUG_ADDR_PATTERN, trim($chunk), $matches)) {
$loop->cancelTimer($killer);
$chrome->on('exit', function ($exitCode, $signal) use ($killer) {
Loop::cancelTimer($killer);

try {
$pdf = $this->printToPDF(
$matches[1],
$matches[2],
! $this->document->isEmpty() ? $this->document->getPrintParameters() : []
);
} catch (Exception $e) {
Logger::error('Failed to print PDF. An error occurred: %s', $e);
}
Logger::debug('Browser terminated by signal %d and exited with code %d', $signal, $exitCode);

$chrome->terminate();
}
});
// Browser is either timed out (after 10s) and the promise should have already been rejected,
// or it is terminated using its terminate() method, in which case the promise is also already
// resolved/rejected. So, we don't need to resolve/reject the promise here.
});

$chrome->on('exit', function ($exitCode, $termSignal) use ($loop, $killer) {
$loop->cancelTimer($killer);
return;
}

Logger::debug('Browser terminated by signal %d and exited with code %d', $termSignal, $exitCode);
});
if (! empty($pdf)) {
$deferred->resolve($pdf);
} else {
$deferred->reject(
new Exception(
'Received empty response or none at all from browser.'
. ' Please check the logs for further details.'
)
);
}
});

$loop->run();
}
return $deferred->promise();
}

if (empty($pdf)) {
throw new Exception(
'Received empty response or none at all from browser.'
. ' Please check the logs for further details.'
);
}
/**
* Export to PDF
*
* @return string
* @throws Exception
*/
public function toPdf()
{
$pdf = '';
// We don't intend to register any then/otherwise handlers, so call done on that promise
// to properly propagate unhandled exceptions to the caller.
$this->asyncToPdf()->done(function (string $newPdf) use (&$pdf) {
$pdf = $newPdf;
});

Loop::run();

return $pdf;
}
Expand Down
57 changes: 39 additions & 18 deletions library/Pdfexport/ProvidedHook/Pdfexport.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
use Icinga\Module\Pdfexport\PrintableHtmlDocument;
use iio\libmergepdf\Driver\TcpdiDriver;
use iio\libmergepdf\Merger;
use React\Promise\ExtendedPromiseInterface;

class Pdfexport extends PdfexportHook
{
Expand Down Expand Up @@ -86,43 +87,53 @@ public function htmlToPdf($html)
)
->toPdf();

$merger = new Merger(new TcpdiDriver());
$merger->addRaw($coverPagePdf);
$merger->addRaw($pdf);

$pdf = $merger->merge();
$pdf = $this->mergePdfs($coverPagePdf, $pdf);
}

return $pdf;
}

public function streamPdfFromHtml($html, $filename)
/**
* Transforms the given printable html document/string asynchronously to PDF.
*
* @param PrintableHtmlDocument|string $html
*
* @return ExtendedPromiseInterface
*/
public function asyncHtmlToPdf($html): ExtendedPromiseInterface
{
$filename = basename($filename, '.pdf') . '.pdf';

// Keep reference to the chrome object because it is using temp files which are automatically removed when
// the object is destructed
$chrome = $this->chrome();

$pdf = $chrome->fromHtml($html, static::getForceTempStorage())->toPdf();
$pdfPromise = $chrome->fromHtml($html, static::getForceTempStorage())->asyncToPdf();

if ($html instanceof PrintableHtmlDocument && ($coverPage = $html->getCoverPage()) !== null) {
$coverPagePdf = $chrome
->fromHtml(
/** @var ExtendedPromiseInterface $pdfPromise */
$pdfPromise = $pdfPromise->then(function (string $pdf) use ($chrome, $html, $coverPage) {
return $chrome->fromHtml(
(new PrintableHtmlDocument())
->add($coverPage)
->addAttributes($html->getAttributes())
->removeMargins(),
static::getForceTempStorage()
)
->toPdf();
)->asyncToPdf()->then(
function (string $coverPagePdf) use ($pdf) {
return $this->mergePdfs($coverPagePdf, $pdf);
}
);
});
}

$merger = new Merger(new TcpdiDriver());
$merger->addRaw($coverPagePdf);
$merger->addRaw($pdf);
return $pdfPromise;
}

$pdf = $merger->merge();
}
public function streamPdfFromHtml($html, $filename)
{
$filename = basename($filename, '.pdf') . '.pdf';

// Generate the PDF before changing the response headers to properly handle and display errors in the UI.
$pdf = $this->htmlToPdf($html);

/** @var Web $app */
$app = Icinga::app();
Expand Down Expand Up @@ -151,4 +162,14 @@ protected function chrome()

return $chrome;
}

protected function mergePdfs(string ...$pdfs): string
{
$merger = new Merger(new TcpdiDriver());
foreach ($pdfs as $pdf) {
$merger->addRaw($pdf);
}

return $merger->merge();
}
}
4 changes: 2 additions & 2 deletions phpstan-baseline.neon
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,12 @@ parameters:
path: library/Pdfexport/HeadlessChrome.php

-
message: "#^Cannot access offset 'webSocketDebuggerUrl' on array\\|bool\\.$#"
message: "#^Cannot call method on\\(\\) on React\\\\Stream\\\\ReadableStreamInterface\\|React\\\\Stream\\\\WritableStreamInterface\\|null\\.$#"
count: 1
path: library/Pdfexport/HeadlessChrome.php

-
message: "#^Cannot call method on\\(\\) on React\\\\Stream\\\\ReadableStreamInterface\\|React\\\\Stream\\\\WritableStreamInterface\\|null\\.$#"
message: "#^Method Icinga\\\\Module\\\\Pdfexport\\\\HeadlessChrome\\:\\:asyncToPdf\\(\\) should return React\\\\Promise\\\\ExtendedPromiseInterface but returns React\\\\Promise\\\\PromiseInterface\\.$#"
count: 1
path: library/Pdfexport/HeadlessChrome.php

Expand Down

0 comments on commit 43c4232

Please sign in to comment.