From a4869fecee5c14dc6d9c8b2a7fba44b7ec518209 Mon Sep 17 00:00:00 2001 From: Martynas Bagdonas Date: Sat, 22 Dec 2018 00:06:34 +0200 Subject: [PATCH 1/2] Limit response size --- src/http.js | 227 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 147 insertions(+), 80 deletions(-) diff --git a/src/http.js b/src/http.js index 2bb5188..1d52fca 100644 --- a/src/http.js +++ b/src/http.js @@ -47,6 +47,78 @@ Zotero.HTTP = new function() { }; this.TimeoutError.prototype = Object.create(Error.prototype); + /** + * request.js doesn't support response size limitation, therefore + * we have to do it manually + * + * @param {String} method + * @param {String} requestURL + * @param {Object} options + * @return {Promise} response, body + */ + this.customRequest = function (method, requestURL, options) { + return new Promise(function (resolve, reject) { + let response; + + // Make sure resolve/reject is called only once even if request.js + // is emitting events when it shouldn't + let returned = false; + + // Store buffers in array, because concatenation operation is is unbelievably slow + let buffers = []; + let bufferLength = 0; + + let req = request({ + uri: requestURL, + method, + headers: options.headers, + timeout: options.timeout, + body: options.body, + gzip: true, + followAllRedirects: true, + jar: options.cookieSandbox, + encoding: null // Get body in a buffer + }) + .on('error', function (err) { + if (returned) return; + reject(err); + }) + .on('data', function (chunk) { + if (returned) return; + + bufferLength += chunk.length; + buffers.push(chunk); + + if (bufferLength > options.maxResponseSize) { + req.abort(); + returned = true; + reject(new Error('Response exceeds max size')); + } + }) + .on('response', function (res) { + if (returned) return; + response = res; + // Content-length doesn't always exists or it can be a length of a gzipped content, + // but it's still worth to do the initial size check + if ( + response.headers['content-length'] !== undefined && + response.headers['content-length'] > options.maxResponseSize + ) { + req.abort(); + returned = true; + reject(new Error('Response exceeds max size')); + } + + // TODO: Filter content-type too + }) + .on('end', function () { + if (returned) return; + returned = true; + resolve({response, body: Buffer.concat(buffers, bufferLength)}); + }); + }); + }; + /** * Get a promise for a HTTP request * @@ -67,7 +139,7 @@ Zotero.HTTP = new function() { * - headers {Object} * - statusCode {Number} */ - this.request = function(method, requestURL, options = {}) { + this.request = async function(method, requestURL, options = {}) { // Default options options = Object.assign({ body: null, @@ -77,7 +149,8 @@ Zotero.HTTP = new function() { logBodyLength: 1024, timeout: 15000, responseType: '', - successCodes: null + successCodes: null, + maxResponseSize: 50 * 1024 * 1024 }, options); options.headers = Object.assign({ @@ -100,7 +173,7 @@ Zotero.HTTP = new function() { // Allow XHR to set Content-Type with boundary for multipart/form-data delete options.headers["Content-Type"]; } - + logBody = `: ${options.body.substr(0, options.logBodyLength)}` + options.body.length > options.logBodyLength ? '...' : ''; // TODO: make sure below does its job in every API call instance @@ -110,87 +183,81 @@ Zotero.HTTP = new function() { } Zotero.debug(`HTTP ${method} ${requestURL}${logBody}`); - return new Promise(function(resolve, reject) { - request({ - uri: requestURL, - method, - headers: options.headers, - timeout: options.timeout, - body: options.body, - gzip: true, - followAllRedirects: true, - jar: options.cookieSandbox - }, function(error, response, body) { - if (error) { - return reject(error); - } - - // Array of success codes given - if (options.successCodes) { - var success = options.successCodes.includes(response.statusCode); - } - // Explicit FALSE means allow any status code - else if (options.successCodes === false) { - var success = true; - } - // Otherwise, 2xx is success - else { - var success = response.statusCode >= 200 && response.statusCode < 300; - } - if (!success) { - return reject(new Zotero.HTTP.StatusError(requestURL, response.statusCode, response.body)); - } + let {response, body} = await this.customRequest(method, requestURL, options); + + if (!response.headers['content-type']) { + return reject(new Error('Missing content-type header')); + } + + // Array of success codes given + if (options.successCodes) { + var success = options.successCodes.includes(response.statusCode); + } + // Explicit FALSE means allow any status code + else if (options.successCodes === false) { + var success = true; + } + // Otherwise, 2xx is success + else { + var success = response.statusCode >= 200 && response.statusCode < 300; + } + if (!success) { + throw new Zotero.HTTP.StatusError(requestURL, response.statusCode, response.body); + } - if (options.debug) { - Zotero.debug(`HTTP ${response.statusCode} response: ${body}`); - } - - var result = { - responseURL: response.request.uri.href, - headers: response.headers, - status: response.statusCode - }; - - if (options.responseType == 'document') { - let dom = new JSDOM(body, { url: result.responseURL}); - wgxpath.install(dom.window, true); - result.response = dom.window.document; - - // Follow meta redirects - if (response.headers['content-type'] - && response.headers['content-type'].startsWith('text/html')) { - let meta = result.response.querySelector('meta[http-equiv=refresh]'); - if (meta && meta.getAttribute('content')) { - let parts = meta.getAttribute('content').split(/;\s*url=/); - // If there's a redirect to another URL in less than 15 seconds, - // follow it - if (parts.length == 2 && parseInt(parts[0]) <= 15) { - let newURL = parts[1].trim().replace(/^'(.+)'/, '$1'); - newURL = url.resolve(requestURL, newURL); - - Zotero.debug("Meta refresh to " + newURL); - result = Zotero.HTTP.request(method, newURL, options); - } - } + if (options.debug) { + Zotero.debug(`HTTP ${response.statusCode} response: ${body}`); + } + + var result = { + responseURL: response.request.uri.href, + headers: response.headers, + status: response.statusCode + }; + + if (options.responseType == 'document') { + let dom = new JSDOM(body, { + url: result.responseURL, + // Inform JSDOM what content type it's parsing, + // so it could reject unsupported content types + contentType: response.headers['content-type'] + }); + wgxpath.install(dom.window, true); + result.response = dom.window.document; + + // Follow meta redirects + if (response.headers['content-type'] + && response.headers['content-type'].startsWith('text/html')) { + let meta = result.response.querySelector('meta[http-equiv=refresh]'); + if (meta && meta.getAttribute('content')) { + let parts = meta.getAttribute('content').split(/;\s*url=/); + // If there's a redirect to another URL in less than 15 seconds, + // follow it + if (parts.length == 2 && parseInt(parts[0]) <= 15) { + let newURL = parts[1].trim().replace(/^'(.+)'/, '$1'); + newURL = url.resolve(requestURL, newURL); + + Zotero.debug("Meta refresh to " + newURL); + result = Zotero.HTTP.request(method, newURL, options); } } - else if (options.responseType == 'json') { - result.response = JSON.parse(body); - } - else if (!options.responseType || options.responseType == 'text') { - result.response = body; - result.responseText = body; - } - else { - throw new Error("Invalid responseType"); - } - - return resolve(result); - }); - }); + } + } + else if (options.responseType == 'json') { + result.response = JSON.parse(body.toString()); + } + else if (!options.responseType || options.responseType == 'text') { + body = body.toString(); + result.response = body; + result.responseText = body; + } + else { + throw new Error("Invalid responseType"); + } + return result; }; - + /** * Load one or more documents * @@ -296,4 +363,4 @@ Zotero.HTTP = new function() { }; } -module.exports = Zotero.HTTP; \ No newline at end of file +module.exports = Zotero.HTTP; From 33354064210081cc4202c1f5c5207faa338417e0 Mon Sep 17 00:00:00 2001 From: Martynas Bagdonas Date: Sat, 22 Dec 2018 15:24:58 +0200 Subject: [PATCH 2/2] Move customRequest outside of Zotero.HTTP and add ResponseSize error --- src/http.js | 147 ++++++++++++++++++++++++---------------------- src/webSession.js | 4 ++ 2 files changed, 80 insertions(+), 71 deletions(-) diff --git a/src/http.js b/src/http.js index 1d52fca..ddeaebe 100644 --- a/src/http.js +++ b/src/http.js @@ -47,77 +47,10 @@ Zotero.HTTP = new function() { }; this.TimeoutError.prototype = Object.create(Error.prototype); - /** - * request.js doesn't support response size limitation, therefore - * we have to do it manually - * - * @param {String} method - * @param {String} requestURL - * @param {Object} options - * @return {Promise} response, body - */ - this.customRequest = function (method, requestURL, options) { - return new Promise(function (resolve, reject) { - let response; - - // Make sure resolve/reject is called only once even if request.js - // is emitting events when it shouldn't - let returned = false; - - // Store buffers in array, because concatenation operation is is unbelievably slow - let buffers = []; - let bufferLength = 0; - - let req = request({ - uri: requestURL, - method, - headers: options.headers, - timeout: options.timeout, - body: options.body, - gzip: true, - followAllRedirects: true, - jar: options.cookieSandbox, - encoding: null // Get body in a buffer - }) - .on('error', function (err) { - if (returned) return; - reject(err); - }) - .on('data', function (chunk) { - if (returned) return; - - bufferLength += chunk.length; - buffers.push(chunk); - - if (bufferLength > options.maxResponseSize) { - req.abort(); - returned = true; - reject(new Error('Response exceeds max size')); - } - }) - .on('response', function (res) { - if (returned) return; - response = res; - // Content-length doesn't always exists or it can be a length of a gzipped content, - // but it's still worth to do the initial size check - if ( - response.headers['content-length'] !== undefined && - response.headers['content-length'] > options.maxResponseSize - ) { - req.abort(); - returned = true; - reject(new Error('Response exceeds max size')); - } - - // TODO: Filter content-type too - }) - .on('end', function () { - if (returned) return; - returned = true; - resolve({response, body: Buffer.concat(buffers, bufferLength)}); - }); - }); + this.ResponseSizeError = function(url) { + this.message = `${url} response exceeds max size`; }; + this.ResponseSizeError.prototype = Object.create(Error.prototype); /** * Get a promise for a HTTP request @@ -183,7 +116,7 @@ Zotero.HTTP = new function() { } Zotero.debug(`HTTP ${method} ${requestURL}${logBody}`); - let {response, body} = await this.customRequest(method, requestURL, options); + let {response, body} = await customRequest(method, requestURL, options); if (!response.headers['content-type']) { return reject(new Error('Missing content-type header')); @@ -363,4 +296,76 @@ Zotero.HTTP = new function() { }; } +/** + * request.js doesn't support response size limitation, therefore + * we have to do it manually + * + * @param {String} method + * @param {String} requestURL + * @param {Object} options + * @return {Promise} response, body + */ +function customRequest(method, requestURL, options) { + return new Promise(function (resolve, reject) { + let response; + + // Make sure resolve/reject is called only once even if request.js + // is emitting events when it shouldn't + let returned = false; + + // Store buffers in array, because concatenation operation is is unbelievably slow + let buffers = []; + let bufferLength = 0; + + let req = request({ + uri: requestURL, + method, + headers: options.headers, + timeout: options.timeout, + body: options.body, + gzip: true, + followAllRedirects: true, + jar: options.cookieSandbox, + encoding: null // Get body in a buffer + }) + .on('error', function (err) { + if (returned) return; + reject(err); + }) + .on('data', function (chunk) { + if (returned) return; + + bufferLength += chunk.length; + buffers.push(chunk); + + if (bufferLength > options.maxResponseSize) { + req.abort(); + returned = true; + reject(new Zotero.HTTP.ResponseSizeError(requestURL)); + } + }) + .on('response', function (res) { + if (returned) return; + response = res; + // Content-length doesn't always exists or it can be a length of a gzipped content, + // but it's still worth to do the initial size check + if ( + response.headers['content-length'] !== undefined && + response.headers['content-length'] > options.maxResponseSize + ) { + req.abort(); + returned = true; + reject(new Zotero.HTTP.ResponseSizeError(requestURL)); + } + + // TODO: Filter content-type too + }) + .on('end', function () { + if (returned) return; + returned = true; + resolve({response, body: Buffer.concat(buffers, bufferLength)}); + }); + }); +}; + module.exports = Zotero.HTTP; diff --git a/src/webSession.js b/src/webSession.js index 89240ee..8e01179 100644 --- a/src/webSession.js +++ b/src/webSession.js @@ -175,6 +175,10 @@ WebSession.prototype.handleURL = async function () { this.ctx.throw(400, "Remote page not found"); } + if (e instanceof Zotero.HTTP.ResponseSizeError) { + this.ctx.throw(400, "Response exceeds max size"); + } + //Parse URL up to '?' for DOI let doi = Zotero.Utilities.cleanDOI(decodeURIComponent(url).match(/[^\?]+/)[0]); if (doi) {