From 5f6682e6e75eb3e096ef5876a7c4cf91a3469145 Mon Sep 17 00:00:00 2001 From: Andrew Bromwich Date: Thu, 10 Oct 2024 21:10:00 +1000 Subject: [PATCH] Add raise_on_js_error option --- README.md | 3 ++ lib/grover/errors.rb | 12 +++++ lib/grover/js/processor.cjs | 69 +++++++++++++++++++-------- lib/grover/options_fixer.rb | 3 +- lib/grover/processor.rb | 4 +- lib/grover/utils.rb | 3 +- spec/grover/configuration_spec.rb | 72 ++++++++++++++++++++++++++++ spec/grover/processor_spec.rb | 78 +++++++++++++++++++++++++++---- spec/grover/utils_spec.rb | 25 ++++++++-- 9 files changed, 231 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 37c85a0..4fae14f 100644 --- a/README.md +++ b/README.md @@ -162,6 +162,9 @@ The `wait_for_timeout` option can also be used to wait the specified number of m The `raise_on_request_failure` option, when enabled, will raise a `Grover::JavaScript::RequestFailedError` if the initial content request or any subsequent asset request returns a bad response or times out. +The `raise_on_js_error` option, when enabled, will raise a `Grover::JavaScript::PageRenderError` if any uncaught +Javascript errors occur when trying to render the page. + The Chrome/Chromium executable path can be overridden with the `executable_path` option. Supplementary JavaScript can be executed on the page (after render and before conversion to PDF/image) diff --git a/lib/grover/errors.rb b/lib/grover/errors.rb index fe999a3..a225554 100644 --- a/lib/grover/errors.rb +++ b/lib/grover/errors.rb @@ -11,6 +11,18 @@ class Grover module JavaScript # rubocop:disable Style/Documentation Error = Class.new(::Grover::Error) UnknownError = Class.new(Error) + + ErrorWithDetails = Class.new(Error) do + def initialize(name, error_details) + super(name) + @error_details = Grover::Utils.deep_transform_keys_in_object error_details, &:to_sym + end + + attr_reader :error_details + end + RequestFailedError = Class.new(ErrorWithDetails) + PageRenderError = Class.new(ErrorWithDetails) + def self.const_missing(name) const_set name, Class.new(Error) end diff --git a/lib/grover/js/processor.cjs b/lib/grover/js/processor.cjs index dfe389c..f00bc52 100644 --- a/lib/grover/js/processor.cjs +++ b/lib/grover/js/processor.cjs @@ -26,8 +26,32 @@ const fs = require('fs'); const os = require('os'); const path = require('path'); +function GroverError(name, errors) { + this.name = name; + this.message = errors.map(e => e.message).join("\n"); + this.errors = errors; +} +GroverError.prototype = Error.prototype; + const _processPage = (async (convertAction, uriOrHtml, options) => { - let browser, page, errors = [], tmpDir, wsConnection = false; + let browser, page, tmpDir, wsConnection = false; + const requestErrors = [], pageErrors = []; + + const captureRequestError = (request) => { + const requestError = { url: request.url() }; + + if (request.failure()) { + requestError.reason = request.failure().errorText; + requestError.message = requestError.reason + " at " + requestError.url; + } else if (request.response() && request.response().status()) { + requestError.status = request.response().status(); + requestError.message = requestError.status + " " + requestError.url; + } else { + requestError.message = "UnknownError " + requestError.url; + } + + requestErrors.push(requestError); + }; try { // Configure puppeteer debugging options @@ -163,12 +187,24 @@ const _processPage = (async (convertAction, uriOrHtml, options) => { const raiseOnRequestFailure = options.raiseOnRequestFailure; delete options.raiseOnRequestFailure; if (raiseOnRequestFailure) { page.on('requestfinished', (request) => { - if (request.response() && !(request.response().ok() || request.response().status() === 304) && !request.redirectChain().length > 0) { - errors.push(request); + if (request.response() && + !(request.response().ok() || request.response().status() === 304) && + !request.redirectChain().length > 0) { + captureRequestError(request); } }); page.on('requestfailed', (request) => { - errors.push(request); + captureRequestError(request); + }); + } + + const raiseOnJSError = options.raiseOnJSError; delete options.raiseOnJSError; + if (raiseOnJSError) { + page.on('pageerror', (error) => { + pageErrors.push({ + message: error.toString().replace(new RegExp('^' + error.name + ': '), ''), + type: error.name || 'Error' + }); }); } @@ -257,21 +293,12 @@ const _processPage = (async (convertAction, uriOrHtml, options) => { await page.hover(hoverSelector); } - if (errors.length > 0) { - function RequestFailedError(errors) { - this.name = "RequestFailedError"; - this.message = errors.map(e => { - if (e.failure()) { - return e.failure().errorText + " at " + e.url(); - } else if (e.response() && e.response().status()) { - return e.response().status() + " " + e.url(); - } else { - return "UnknownError " + e.url() - } - }).join("\n"); - } - RequestFailedError.prototype = Error.prototype; - throw new RequestFailedError(errors); + if (requestErrors.length > 0) { + throw new GroverError("RequestFailedError", requestErrors); + } + + if (pageErrors.length > 0) { + throw new GroverError("PageRenderError", pageErrors); } // Setup conversion timeout @@ -301,7 +328,9 @@ const _processPage = (async (convertAction, uriOrHtml, options) => { }); function _handleError(error) { - if (error instanceof Error) { + if (error instanceof GroverError) { + process.stdout.write(JSON.stringify(['err', error.message, error.name, error.errors])); + } else if (error instanceof Error) { process.stdout.write( JSON.stringify(['err', error.toString().replace(new RegExp('^' + error.name + ': '), ''), error.name]) ); diff --git a/lib/grover/options_fixer.rb b/lib/grover/options_fixer.rb index 21eb757..20f70ce 100644 --- a/lib/grover/options_fixer.rb +++ b/lib/grover/options_fixer.rb @@ -34,7 +34,8 @@ def fix_options!(*option_paths) def fix_boolean_options! fix_options!( 'display_header_footer', 'full_page', 'landscape', 'omit_background', 'prefer_css_page_size', - 'print_background', 'viewport.has_touch', 'viewport.is_landscape', 'viewport.is_mobile', 'bypass_csp' + 'print_background', 'viewport.has_touch', 'viewport.is_landscape', 'viewport.is_mobile', 'bypass_csp', + 'raise_on_request_failure', 'raise_on_js_error' ) { |value| !FALSE_VALUES.include?(value) } end diff --git a/lib/grover/processor.rb b/lib/grover/processor.rb index 2a95a0a..839d07d 100644 --- a/lib/grover/processor.rb +++ b/lib/grover/processor.rb @@ -83,14 +83,14 @@ def call_js_method(method, url_or_html, options) # rubocop:disable Metrics/AbcSi input = stdout.gets raise Errno::EPIPE, "Can't read from worker" if input.nil? - status, message, error_class = JSON.parse(input) + status, message, error_class, errors = JSON.parse(input) if status == 'ok' message elsif error_class.nil? raise Grover::JavaScript::UnknownError, message else - raise Grover::JavaScript.const_get(error_class, false), message + raise Grover::JavaScript.const_get(error_class, false).new(*[message, errors].compact) end rescue JSON::ParserError raise Grover::Error, 'Malformed worker response' diff --git a/lib/grover/utils.rb b/lib/grover/utils.rb index ba306c1..317b1e1 100644 --- a/lib/grover/utils.rb +++ b/lib/grover/utils.rb @@ -8,7 +8,8 @@ class Utils ACRONYMS = { 'css' => 'CSS', 'csp' => 'CSP', - 'http' => 'HTTP' + 'http' => 'HTTP', + 'js' => 'JS' }.freeze private_constant :ACRONYMS diff --git a/spec/grover/configuration_spec.rb b/spec/grover/configuration_spec.rb index 6057fec..45b15ff 100644 --- a/spec/grover/configuration_spec.rb +++ b/spec/grover/configuration_spec.rb @@ -37,4 +37,76 @@ it { is_expected.to eq 'https://my.domain' } end end + + describe '#ignore_path' do + subject(:ignore_path) { configuration.ignore_path } + + it { is_expected.to be_nil } + + context 'when configured differently' do + before { configuration.ignore_path = '/foo/bar' } + + it { is_expected.to eq '/foo/bar' } + end + end + + describe '#use_pdf_middleware' do + subject(:use_pdf_middleware) { configuration.use_pdf_middleware } + + it { is_expected.to be true } + + context 'when configured differently' do + before { configuration.use_pdf_middleware = false } + + it { is_expected.to be false } + end + end + + describe '#use_png_middleware' do + subject(:use_png_middleware) { configuration.use_png_middleware } + + it { is_expected.to be false } + + context 'when configured differently' do + before { configuration.use_png_middleware = true } + + it { is_expected.to be true } + end + end + + describe '#use_jpeg_middleware' do + subject(:use_jpeg_middleware) { configuration.use_jpeg_middleware } + + it { is_expected.to be false } + + context 'when configured differently' do + before { configuration.use_jpeg_middleware = true } + + it { is_expected.to be true } + end + end + + describe '#node_env_vars' do + subject(:node_env_vars) { configuration.node_env_vars } + + it { is_expected.to eq({}) } + + context 'when configured differently' do + before { configuration.node_env_vars = { 'LD_PRELOAD' => '' } } + + it { is_expected.to eq({ 'LD_PRELOAD' => '' }) } + end + end + + describe '#allow_file_uris' do + subject(:allow_file_uris) { configuration.allow_file_uris } + + it { is_expected.to be false } + + context 'when configured differently' do + before { configuration.allow_file_uris = true } + + it { is_expected.to be true } + end + end end diff --git a/spec/grover/processor_spec.rb b/spec/grover/processor_spec.rb index ac02d55..8ea728a 100644 --- a/spec/grover/processor_spec.rb +++ b/spec/grover/processor_spec.rb @@ -697,10 +697,15 @@ end it do - expect do - convert - end.to raise_error Grover::JavaScript::RequestFailedError, - "net::ERR_NAME_NOT_RESOLVED at #{protocol}://foo.bar/baz.img" + expect { convert }.to raise_error do |error| + expect(error).to be_a Grover::JavaScript::RequestFailedError + expect(error.message).to eq "net::ERR_NAME_NOT_RESOLVED at #{protocol}://foo.bar/baz.img" + expect(error.error_details).to eq [{ + url: "#{protocol}://foo.bar/baz.img", + reason: 'net::ERR_NAME_NOT_RESOLVED', + message: "net::ERR_NAME_NOT_RESOLVED at #{protocol}://foo.bar/baz.img" + }] + end end end @@ -715,18 +720,28 @@ HTML end - let(:error_message) do + let(:error_details) do if puppeteer_version_on_or_after? '22.6.0' - 'net::ERR_BLOCKED_BY_ORB at https://google.com/404.jpg' + { + url: 'https://google.com/404.jpg', + reason: 'net::ERR_BLOCKED_BY_ORB', + message: 'net::ERR_BLOCKED_BY_ORB at https://google.com/404.jpg' + } else - '404 https://google.com/404.jpg' + { + url: 'https://google.com/404.jpg', + status: 404, + message: '404 https://google.com/404.jpg' + } end end it do - expect do - convert - end.to raise_error Grover::JavaScript::RequestFailedError, error_message + expect { convert }.to raise_error do |error| + expect(error).to be_a Grover::JavaScript::RequestFailedError + expect(error.message).to eq error_details[:message] + expect(error.error_details).to eq [error_details] + end end end @@ -769,6 +784,49 @@ end end + context 'when raise on JS error option is specified' do + let(:options) { basic_header_footer_options.merge('raiseOnJSError' => raise_errors) } + let(:raise_errors) { true } + + context 'when a failure occurs it raises an error' do + let(:url_or_html) do + <<-HTML + + + + Success? + + + + + HTML + end + + it do + expect { convert }.to raise_error do |error| + expect(error).to be_a Grover::JavaScript::PageRenderError + expect(error.message).to eq "Unexpected identifier 'went'\nReally wrong" + expect(error.error_details).to eq [ + { + type: 'SyntaxError', + message: "Unexpected identifier 'went'" + }, + { + type: 'Error', + message: 'Really wrong' + } + ] + end + end + + context 'when `raiseOnJSError` is disabled' do + let(:raise_errors) { false } + + it { expect(pdf_text_content).to eq "#{date} Success? #{protocol}://www.example.net/foo/bar 1/1" } + end + end + end + context 'when waitForSelector option is specified with options' do let(:url_or_html) do <<-HTML diff --git a/spec/grover/utils_spec.rb b/spec/grover/utils_spec.rb index 8a42074..a47d089 100644 --- a/spec/grover/utils_spec.rb +++ b/spec/grover/utils_spec.rb @@ -298,12 +298,24 @@ it { is_expected.to eq('fooBar' => 'baz') } end - context 'when key has an acronym in it' do - let(:object) { { prefer_css_page_size: true, bypass_csp: false, extra_http_headers: { 'Foo' => 'Bar' } } } + context 'when keys have acronyms in them' do + let(:object) do + { + prefer_css_page_size: true, + bypass_csp: false, + extra_http_headers: { 'Foo' => 'Bar' }, + raise_on_js_error: true + } + end it 'returns the acronym components of the keys uppercase' do expect(normalize_object).to( - eq('preferCSSPageSize' => true, 'bypassCSP' => false, 'extraHTTPHeaders' => { 'foo' => 'Bar' }) + eq( + 'preferCSSPageSize' => true, + 'bypassCSP' => false, + 'extraHTTPHeaders' => { 'foo' => 'Bar' }, + 'raiseOnJSError' => true + ) ) end @@ -312,7 +324,12 @@ it 'returns the acronym components of the keys uppercase (but does not transform the extraHTTPHeaders value' do expect(normalize_object).to( - eq('preferCSSPageSize' => true, 'bypassCSP' => false, 'extraHTTPHeaders' => { 'Foo' => 'Bar' }) + eq( + 'preferCSSPageSize' => true, + 'bypassCSP' => false, + 'extraHTTPHeaders' => { 'Foo' => 'Bar' }, + 'raiseOnJSError' => true + ) ) end end