Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Limit response size #69

Merged
merged 2 commits into from
Dec 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
232 changes: 152 additions & 80 deletions src/http.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ Zotero.HTTP = new function() {
};
this.TimeoutError.prototype = Object.create(Error.prototype);

this.ResponseSizeError = function(url) {
this.message = `${url} response exceeds max size`;
};
this.ResponseSizeError.prototype = Object.create(Error.prototype);

/**
* Get a promise for a HTTP request
*
Expand All @@ -67,7 +72,7 @@ Zotero.HTTP = new function() {
* - headers {Object}
* - statusCode {Number}
*/
this.request = function(method, requestURL, options = {}) {
this.request = async function(method, requestURL, options = {}) {
// Default options
options = Object.assign({
body: null,
Expand All @@ -77,7 +82,8 @@ Zotero.HTTP = new function() {
logBodyLength: 1024,
timeout: 15000,
responseType: '',
successCodes: null
successCodes: null,
maxResponseSize: 50 * 1024 * 1024
}, options);

options.headers = Object.assign({
Expand All @@ -100,7 +106,7 @@ Zotero.HTTP = new function() {
// Allow XHR to set Content-Type with boundary for multipart/form-data
delete options.headers["Content-Type"];
}

logBody = `: ${options.body.substr(0, options.logBodyLength)}` +
options.body.length > options.logBodyLength ? '...' : '';
// TODO: make sure below does its job in every API call instance
Expand All @@ -110,87 +116,81 @@ Zotero.HTTP = new function() {
}
Zotero.debug(`HTTP ${method} ${requestURL}${logBody}`);

return new Promise(function(resolve, reject) {
request({
uri: requestURL,
method,
headers: options.headers,
timeout: options.timeout,
body: options.body,
gzip: true,
followAllRedirects: true,
jar: options.cookieSandbox
}, function(error, response, body) {
if (error) {
return reject(error);
}

// Array of success codes given
if (options.successCodes) {
var success = options.successCodes.includes(response.statusCode);
}
// Explicit FALSE means allow any status code
else if (options.successCodes === false) {
var success = true;
}
// Otherwise, 2xx is success
else {
var success = response.statusCode >= 200 && response.statusCode < 300;
}
if (!success) {
return reject(new Zotero.HTTP.StatusError(requestURL, response.statusCode, response.body));
}
let {response, body} = await customRequest(method, requestURL, options);

if (!response.headers['content-type']) {
return reject(new Error('Missing content-type header'));
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this we should also return 400, since we don't want to repeat translation too. It could be something like InvalidResponse. But maybe it's better to wait until we merge #59 because I suspect some changes for content-type filtering.

}

// Array of success codes given
if (options.successCodes) {
var success = options.successCodes.includes(response.statusCode);
}
// Explicit FALSE means allow any status code
else if (options.successCodes === false) {
var success = true;
}
// Otherwise, 2xx is success
else {
var success = response.statusCode >= 200 && response.statusCode < 300;
}
if (!success) {
throw new Zotero.HTTP.StatusError(requestURL, response.statusCode, response.body);
}

if (options.debug) {
Zotero.debug(`HTTP ${response.statusCode} response: ${body}`);
}

var result = {
responseURL: response.request.uri.href,
headers: response.headers,
status: response.statusCode
};

if (options.responseType == 'document') {
let dom = new JSDOM(body, { url: result.responseURL});
wgxpath.install(dom.window, true);
result.response = dom.window.document;

// Follow meta redirects
if (response.headers['content-type']
&& response.headers['content-type'].startsWith('text/html')) {
let meta = result.response.querySelector('meta[http-equiv=refresh]');
if (meta && meta.getAttribute('content')) {
let parts = meta.getAttribute('content').split(/;\s*url=/);
// If there's a redirect to another URL in less than 15 seconds,
// follow it
if (parts.length == 2 && parseInt(parts[0]) <= 15) {
let newURL = parts[1].trim().replace(/^'(.+)'/, '$1');
newURL = url.resolve(requestURL, newURL);

Zotero.debug("Meta refresh to " + newURL);
result = Zotero.HTTP.request(method, newURL, options);
}
}
if (options.debug) {
Zotero.debug(`HTTP ${response.statusCode} response: ${body}`);
}

var result = {
responseURL: response.request.uri.href,
headers: response.headers,
status: response.statusCode
};

if (options.responseType == 'document') {
let dom = new JSDOM(body, {
url: result.responseURL,
// Inform JSDOM what content type it's parsing,
// so it could reject unsupported content types
contentType: response.headers['content-type']
});
wgxpath.install(dom.window, true);
result.response = dom.window.document;

// Follow meta redirects
if (response.headers['content-type']
&& response.headers['content-type'].startsWith('text/html')) {
let meta = result.response.querySelector('meta[http-equiv=refresh]');
if (meta && meta.getAttribute('content')) {
let parts = meta.getAttribute('content').split(/;\s*url=/);
// If there's a redirect to another URL in less than 15 seconds,
// follow it
if (parts.length == 2 && parseInt(parts[0]) <= 15) {
let newURL = parts[1].trim().replace(/^'(.+)'/, '$1');
newURL = url.resolve(requestURL, newURL);

Zotero.debug("Meta refresh to " + newURL);
result = Zotero.HTTP.request(method, newURL, options);
}
}
else if (options.responseType == 'json') {
result.response = JSON.parse(body);
}
else if (!options.responseType || options.responseType == 'text') {
result.response = body;
result.responseText = body;
}
else {
throw new Error("Invalid responseType");
}

return resolve(result);
});
});
}
}
else if (options.responseType == 'json') {
result.response = JSON.parse(body.toString());
}
else if (!options.responseType || options.responseType == 'text') {
body = body.toString();
result.response = body;
result.responseText = body;
}
else {
throw new Error("Invalid responseType");
}

return result;
};

/**
* Load one or more documents
*
Expand Down Expand Up @@ -296,4 +296,76 @@ Zotero.HTTP = new function() {
};
}

module.exports = Zotero.HTTP;
/**
* request.js doesn't support response size limitation, therefore
* we have to do it manually
*
* @param {String} method
* @param {String} requestURL
* @param {Object} options
* @return {Promise<Object>} response, body
*/
function customRequest(method, requestURL, options) {
return new Promise(function (resolve, reject) {
let response;

// Make sure resolve/reject is called only once even if request.js
// is emitting events when it shouldn't
let returned = false;

// Store buffers in array, because concatenation operation is is unbelievably slow
let buffers = [];
let bufferLength = 0;

let req = request({
uri: requestURL,
method,
headers: options.headers,
timeout: options.timeout,
body: options.body,
gzip: true,
followAllRedirects: true,
jar: options.cookieSandbox,
encoding: null // Get body in a buffer
})
.on('error', function (err) {
if (returned) return;
reject(err);
})
.on('data', function (chunk) {
if (returned) return;

bufferLength += chunk.length;
buffers.push(chunk);

if (bufferLength > options.maxResponseSize) {
req.abort();
returned = true;
reject(new Zotero.HTTP.ResponseSizeError(requestURL));
}
})
.on('response', function (res) {
if (returned) return;
response = res;
// Content-length doesn't always exists or it can be a length of a gzipped content,
// but it's still worth to do the initial size check
if (
response.headers['content-length'] !== undefined &&
response.headers['content-length'] > options.maxResponseSize
) {
req.abort();
returned = true;
reject(new Zotero.HTTP.ResponseSizeError(requestURL));
}

// TODO: Filter content-type too
})
.on('end', function () {
if (returned) return;
returned = true;
resolve({response, body: Buffer.concat(buffers, bufferLength)});
});
});
};

module.exports = Zotero.HTTP;
4 changes: 4 additions & 0 deletions src/webSession.js
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ WebSession.prototype.handleURL = async function () {
this.ctx.throw(400, "Remote page not found");
}

if (e instanceof Zotero.HTTP.ResponseSizeError) {
this.ctx.throw(400, "Response exceeds max size");
}

//Parse URL up to '?' for DOI
let doi = Zotero.Utilities.cleanDOI(decodeURIComponent(url).match(/[^\?]+/)[0]);
if (doi) {
Expand Down