From b996f358895b9a8fc79f07308562bfeb1f4af540 Mon Sep 17 00:00:00 2001 From: Zach Bloomquist Date: Fri, 1 Nov 2019 16:58:22 -0400 Subject: [PATCH] Support asynchronous followRedirect filter (callback or promise) Cherry-pick 47cdc67085c9fddc8d39d3172538f3f86c96bb8b Co-authored-by: Brian Reavis --- README.md | 5 +- lib/redirect.js | 217 +++++++++++++++++--------------- request.js | 273 +++++++++++++++++++++-------------------- tests/test-redirect.js | 100 +++++++++++++++ 4 files changed, 360 insertions(+), 235 deletions(-) diff --git a/README.md b/README.md index 99584c2d3..ab099854b 100644 --- a/README.md +++ b/README.md @@ -932,7 +932,10 @@ The first argument can be either a `url` or an `options` object. The only requir --- -- `followRedirect` - follow HTTP 3xx responses as redirects (default: `true`). This property can also be implemented as function which gets `response` object as a single argument and should return `true` if redirects should continue or `false` otherwise. +- `followRedirect` - follow HTTP 3xx responses as redirects (default: `true`). This property can also be implemented as function which gets `response` object as the first argument. + - *(synchronous usage)* It should return `true` if redirects should continue or `false` otherwise. If it returns a url string, the destination of the redirect will be overridden. + - *(async callback usage)* If the function has two arguments, it will be treated as an asynchronous function and will be passed a callback as the second argument. Invoke the callback with an error (`null` if no error) and the boolean/url result as the second. + - *(async promise usage)* Return a promise that resolves to the boolean/url result. - `followAllRedirects` - follow non-GET HTTP 3xx responses as redirects (default: `false`) - `followOriginalHttpMethod` - by default we redirect to HTTP method GET. you can enable this property to redirect to the original HTTP method (default: `false`) - `followAuthorizationHeader` - retain `authorization` header when a redirect happens to a different hostname (default: `false`) diff --git a/lib/redirect.js b/lib/redirect.js index 4d5fbb31f..47bf0ad28 100644 --- a/lib/redirect.js +++ b/lib/redirect.js @@ -83,136 +83,157 @@ Redirect.prototype.redirectTo = function (response) { return redirectTo } -Redirect.prototype.onResponse = function (response) { +Redirect.prototype.onResponse = function (response, callback) { var self = this var request = self.request var urlParser = request.urlParser var options = {} var redirectTo = self.redirectTo(response) - if (!redirectTo || !self.allowRedirect.call(request, response)) { - return false - } + if (!redirectTo) return callback(null, false) + + function processRedirect (shouldRedirect) { + if (!shouldRedirect) return callback(null, false) + if (typeof shouldRedirect === 'string') { + // overridden redirect url + request.debug('redirect overridden', redirectTo) + redirectTo = shouldRedirect + } - request.debug('redirect to', redirectTo) + request.debug('redirect to', redirectTo) - // ignore any potential response body. it cannot possibly be useful - // to us at this point. - // response.resume should be defined, but check anyway before calling. Workaround for browserify. - if (response.resume) { - response.resume() - } + // ignore any potential response body. it cannot possibly be useful + // to us at this point. + // response.resume should be defined, but check anyway before calling. Workaround for browserify. + if (response.resume) { + response.resume() + } - if (self.redirectsFollowed >= self.maxRedirects) { - request.emit('error', new Error('Exceeded maxRedirects. Probably stuck in a redirect loop ' + request.uri.href)) - return false - } - self.redirectsFollowed += 1 + if (self.redirectsFollowed >= self.maxRedirects) { + return callback(new Error('Exceeded maxRedirects. Probably stuck in a redirect loop ' + request.uri.href)) + } + self.redirectsFollowed += 1 - if (!isUrl.test(redirectTo)) { - redirectTo = urlParser.resolve(request.uri.href, redirectTo) - } + if (!isUrl.test(redirectTo)) { + redirectTo = urlParser.resolve(request.uri.href, redirectTo) + } - var uriPrev = request.uri - request.uri = urlParser.parse(redirectTo) + var uriPrev = request.uri + request.uri = urlParser.parse(redirectTo) - // handle the case where we change protocol from https to http or vice versa - if (request.uri.protocol !== uriPrev.protocol) { - delete request.agent - } + // handle the case where we change protocol from https to http or vice versa + if (request.uri.protocol !== uriPrev.protocol) { + delete request.agent + } - self.redirects.push({ statusCode: response.statusCode, redirectUri: redirectTo }) + self.redirects.push({ statusCode: response.statusCode, redirectUri: redirectTo }) - // if the redirect hostname (not just port or protocol) is changed: - // 1. remove host header, the new host will be populated on request.init - // 2. remove authorization header, avoid authentication leak - // @note: This is done because of security reasons, irrespective of the - // status code or request method used. - if (request.headers && uriPrev.hostname !== request.uri.hostname) { - request.removeHeader('host') + // if the redirect hostname (not just port or protocol) is changed: + // 1. remove host header, the new host will be populated on request.init + // 2. remove authorization header, avoid authentication leak + // @note: This is done because of security reasons, irrespective of the + // status code or request method used. + if (request.headers && uriPrev.hostname !== request.uri.hostname) { + request.removeHeader('host') - // use followAuthorizationHeader option to retain authorization header - if (!self.followAuthorizationHeader) { - request.removeHeader('authorization') + // use followAuthorizationHeader option to retain authorization header + if (!self.followAuthorizationHeader) { + request.removeHeader('authorization') + } } - } - delete request.src - delete request.req - delete request._started - - // Switch request method to GET - // - if followOriginalHttpMethod is not set [OVERRIDE] - // - or, statusCode code is not 401, 307 or 308 [STANDARD] - // - also, remove request body for the GET redirect [STANDARD] - // @note: when followOriginalHttpMethod is set, - // it will always retain the request body irrespective of the method (say GET) or status code (any 3XX). - if (!self.followOriginalHttpMethod && - response.statusCode !== 401 && response.statusCode !== 307 && response.statusCode !== 308) { - // force all redirects to use GET (legacy reasons) - // but, HEAD is considered as a safe method so, the method is retained. - if (request.method !== 'HEAD') { - request.method = 'GET' + delete request.src + delete request.req + delete request._started + + // Switch request method to GET + // - if followOriginalHttpMethod is not set [OVERRIDE] + // - or, statusCode code is not 401, 307 or 308 [STANDARD] + // - also, remove request body for the GET redirect [STANDARD] + // @note: when followOriginalHttpMethod is set, + // it will always retain the request body irrespective of the method (say GET) or status code (any 3XX). + if (!self.followOriginalHttpMethod && + response.statusCode !== 401 && response.statusCode !== 307 && response.statusCode !== 308) { + // force all redirects to use GET (legacy reasons) + // but, HEAD is considered as a safe method so, the method is retained. + if (request.method !== 'HEAD') { + request.method = 'GET' + } + + // Remove parameters from the previous response, unless this is the second request + // for a server that requires digest authentication. + delete request.body + delete request._form + delete request._multipart + if (request.headers) { + request.removeHeader('content-type') + request.removeHeader('content-length') + } } - // Remove parameters from the previous response, unless this is the second request - // for a server that requires digest authentication. - delete request.body - delete request._form - delete request._multipart - if (request.headers) { + // Restore form-data stream if request body is retained + if (request.formData && + // make sure _form is released and there's no pending _streams left + // which will be the case for 401 redirects. so, reuse _form on redirect + // @note: multiple form-param / file-streams may cause following issue: + // https://github.com/request/request/issues/887 + // @todo: expose stream errors as events + request._form && request._form._released && + request._form._streams && !request._form._streams.length) { + // reinitialize FormData stream for 307 or 308 redirects + delete request._form + // remove content-type header for new boundary request.removeHeader('content-type') + // remove content-length header since FormValue may be dropped if its not a file stream request.removeHeader('content-length') - } - } - // Restore form-data stream if request body is retained - if (request.formData && - // make sure _form is released and there's no pending _streams left - // which will be the case for 401 redirects. so, reuse _form on redirect - // @note: multiple form-param / file-streams may cause following issue: - // https://github.com/request/request/issues/887 - // @todo: expose stream errors as events - request._form && request._form._released && - request._form._streams && !request._form._streams.length) { - // reinitialize FormData stream for 307 or 308 redirects - delete request._form - // remove content-type header for new boundary - request.removeHeader('content-type') - // remove content-length header since FormValue may be dropped if its not a file stream - request.removeHeader('content-length') - - var formData = [] - var resetFormData = function (key, value, paramOptions) { - // if `value` is of type stream - if (typeof (value && value.pipe) === 'function') { - // bail out if not a file stream - if (!(value.hasOwnProperty('fd') && value.path)) return - // create new file stream - value = fs.createReadStream(value.path) + var formData = [] + var resetFormData = function (key, value, paramOptions) { + // if `value` is of type stream + if (typeof (value && value.pipe) === 'function') { + // bail out if not a file stream + if (!(value.hasOwnProperty('fd') && value.path)) return + // create new file stream + value = fs.createReadStream(value.path) + } + + formData.push({key: key, value: value, options: paramOptions}) + } + for (var i = 0, ii = request.formData.length; i < ii; i++) { + var formParam = request.formData[i] + if (!formParam) { continue } + resetFormData(formParam.key, formParam.value, formParam.options) } - formData.push({key: key, value: value, options: paramOptions}) + // setting `options.formData` will reinitialize FormData in `request.init` + options.formData = formData } - for (var i = 0, ii = request.formData.length; i < ii; i++) { - var formParam = request.formData[i] - if (!formParam) { continue } - resetFormData(formParam.key, formParam.value, formParam.options) + + if (!self.removeRefererHeader) { + request.setHeader('Referer', uriPrev.href) } - // setting `options.formData` will reinitialize FormData in `request.init` - options.formData = formData + request.emit('redirect') + request.init(options) + callback(null, true) } - if (!self.removeRefererHeader) { - request.setHeader('Referer', uriPrev.href) + // test allowRedirect arity; if has more than one argument, + // assume it's asynchronous via a callback + if (self.allowRedirect.length > 1) { + return self.allowRedirect.call(request, response, function (err, result) { + if (err) return callback(err) + processRedirect(result) + }) } - request.emit('redirect') - - request.init(options) + var allowsRedirect = self.allowRedirect.call(request, response) + if (allowsRedirect && allowsRedirect.then) { + return allowsRedirect.then(processRedirect, callback) + } - return true + // treat as a regular boolean + processRedirect(allowsRedirect) } exports.Redirect = Redirect diff --git a/request.js b/request.js index c48ec646d..a1203707c 100644 --- a/request.js +++ b/request.js @@ -1366,164 +1366,165 @@ Request.prototype.onRequestResponse = function (response) { self.clearTimeout() function responseHandler () { - if (self._redirect.onResponse(response)) { - return // Ignore the rest of the response - } + self._redirect.onResponse(response, function (err, followingRedirect) { + if (!err && followingRedirect) return // Ignore the rest of the response + if (err) self.emit('error', err) + + // Be a good stream and emit end when the response is finished. + // Hack to emit end on close because of a core bug that never fires end + response.once('close', function () { + if (!self._ended) { + self.response.emit('end') + } + }) - // Be a good stream and emit end when the response is finished. - // Hack to emit end on close because of a core bug that never fires end - response.once('close', function () { - if (!self._ended) { - self.response.emit('end') + response.once('end', function () { + self._ended = true + }) + + var noBody = function (code) { + return ( + self.method === 'HEAD' || + // Informational + (code >= 100 && code < 200) || + // No Content + code === 204 || + // Not Modified + code === 304 + ) } - }) - response.once('end', function () { - self._ended = true - }) + var responseContent + var downloadSizeTracker = new SizeTrackerStream() - var noBody = function (code) { - return ( - self.method === 'HEAD' || - // Informational - (code >= 100 && code < 200) || - // No Content - code === 204 || - // Not Modified - code === 304 - ) - } - - var responseContent - var downloadSizeTracker = new SizeTrackerStream() - - if ((self.gzip || self.brotli) && !noBody(response.statusCode)) { - var contentEncoding = response.headers['content-encoding'] || 'identity' - contentEncoding = contentEncoding.trim().toLowerCase() - - // Be more lenient with decoding compressed responses, since (very rarely) - // servers send slightly invalid gzip responses that are still accepted - // by common browsers. - // Always using Z_SYNC_FLUSH is what cURL does. - var zlibOptions = { - flush: zlib.Z_SYNC_FLUSH, - finishFlush: zlib.Z_SYNC_FLUSH - } + if ((self.gzip || self.brotli) && !noBody(response.statusCode)) { + var contentEncoding = response.headers['content-encoding'] || 'identity' + contentEncoding = contentEncoding.trim().toLowerCase() - if (self.gzip && contentEncoding === 'gzip') { - responseContent = zlib.createGunzip(zlibOptions) - response.pipe(downloadSizeTracker).pipe(responseContent) - } else if (self.gzip && contentEncoding === 'deflate') { - responseContent = inflate.createInflate(zlibOptions) - response.pipe(downloadSizeTracker).pipe(responseContent) - } else if (self.brotli && contentEncoding === 'br') { - responseContent = brotli.createBrotliDecompress() - response.pipe(downloadSizeTracker).pipe(responseContent) - } else { - // Since previous versions didn't check for Content-Encoding header, - // ignore any invalid values to preserve backwards-compatibility - if (contentEncoding !== 'identity') { - debug('ignoring unrecognized Content-Encoding ' + contentEncoding) + // Be more lenient with decoding compressed responses, since (very rarely) + // servers send slightly invalid gzip responses that are still accepted + // by common browsers. + // Always using Z_SYNC_FLUSH is what cURL does. + var zlibOptions = { + flush: zlib.Z_SYNC_FLUSH, + finishFlush: zlib.Z_SYNC_FLUSH + } + + if (self.gzip && contentEncoding === 'gzip') { + responseContent = zlib.createGunzip(zlibOptions) + response.pipe(downloadSizeTracker).pipe(responseContent) + } else if (self.gzip && contentEncoding === 'deflate') { + responseContent = inflate.createInflate(zlibOptions) + response.pipe(downloadSizeTracker).pipe(responseContent) + } else if (self.brotli && contentEncoding === 'br') { + responseContent = brotli.createBrotliDecompress() + response.pipe(downloadSizeTracker).pipe(responseContent) + } else { + // Since previous versions didn't check for Content-Encoding header, + // ignore any invalid values to preserve backwards-compatibility + if (contentEncoding !== 'identity') { + debug('ignoring unrecognized Content-Encoding ' + contentEncoding) + } + responseContent = response.pipe(downloadSizeTracker) } + } else { responseContent = response.pipe(downloadSizeTracker) } - } else { - responseContent = response.pipe(downloadSizeTracker) - } - if (self.encoding) { - if (self.dests.length !== 0) { - console.error('Ignoring encoding parameter as this stream is being piped to another stream which makes the encoding option invalid.') - } else { - responseContent.setEncoding(self.encoding) + if (self.encoding) { + if (self.dests.length !== 0) { + console.error('Ignoring encoding parameter as this stream is being piped to another stream which makes the encoding option invalid.') + } else { + responseContent.setEncoding(self.encoding) + } } - } - // Node by default returns the status message with `latin1` character encoding, - // which results in characters lying outside the range of `U+0000 to U+00FF` getting truncated - // so that they can be mapped in the given range. - // Refer: https://nodejs.org/docs/latest-v12.x/api/buffer.html#buffer_buffers_and_character_encodings - // - // Exposing `statusMessageEncoding` option to make encoding type configurable. - // This would help in correctly representing status messages belonging to range outside of `latin1` - // - // @note: The Regex `[^\w\s-']` is tested to prevent unnecessary computation of creating a Buffer and - // then decoding it when the status message consists of common characters, - // specifically belonging to the following set: [a-z, A-Z, 0-9, -, _ ', whitespace] - // As in that case, no matter what the encoding type is used for decoding the buffer, the result would remain the same. - // - // @note: Providing a value in this option will result in force re-encoding of the status message - // which may not always be intended by the server - specifically in cases where - // server returns a status message which when encoded again with a different character encoding - // results in some other characters. - // For example: If the server intentionally responds with `ð\x9F\x98\x8A` as status message - // but if the statusMessageEncoding option is set to `utf8`, then it would get converted to '😊'. - var statusMessage = String(response.statusMessage) - if (self.statusMessageEncoding && /[^\w\s-']/.test(statusMessage)) { - response.statusMessage = Buffer.from(statusMessage, 'latin1').toString(self.statusMessageEncoding) - } + // Node by default returns the status message with `latin1` character encoding, + // which results in characters lying outside the range of `U+0000 to U+00FF` getting truncated + // so that they can be mapped in the given range. + // Refer: https://nodejs.org/docs/latest-v12.x/api/buffer.html#buffer_buffers_and_character_encodings + // + // Exposing `statusMessageEncoding` option to make encoding type configurable. + // This would help in correctly representing status messages belonging to range outside of `latin1` + // + // @note: The Regex `[^\w\s-']` is tested to prevent unnecessary computation of creating a Buffer and + // then decoding it when the status message consists of common characters, + // specifically belonging to the following set: [a-z, A-Z, 0-9, -, _ ', whitespace] + // As in that case, no matter what the encoding type is used for decoding the buffer, the result would remain the same. + // + // @note: Providing a value in this option will result in force re-encoding of the status message + // which may not always be intended by the server - specifically in cases where + // server returns a status message which when encoded again with a different character encoding + // results in some other characters. + // For example: If the server intentionally responds with `ð\x9F\x98\x8A` as status message + // but if the statusMessageEncoding option is set to `utf8`, then it would get converted to '😊'. + var statusMessage = String(response.statusMessage) + if (self.statusMessageEncoding && /[^\w\s-']/.test(statusMessage)) { + response.statusMessage = Buffer.from(statusMessage, 'latin1').toString(self.statusMessageEncoding) + } - if (self._paused) { - responseContent.pause() - } + if (self._paused) { + responseContent.pause() + } - self.responseContent = responseContent + self.responseContent = responseContent - self.emit('response', response) + self.emit('response', response) - self.dests.forEach(function (dest) { - self.pipeDest(dest) - }) + self.dests.forEach(function (dest) { + self.pipeDest(dest) + }) - var responseThresholdEnabled = false - var responseBytesLeft + var responseThresholdEnabled = false + var responseBytesLeft - if (typeof self.maxResponseSize === 'number') { - responseThresholdEnabled = true - responseBytesLeft = self.maxResponseSize - } + if (typeof self.maxResponseSize === 'number') { + responseThresholdEnabled = true + responseBytesLeft = self.maxResponseSize + } - responseContent.on('data', function (chunk) { - if (self.timing && !self.responseStarted) { - self.responseStartTime = (new Date()).getTime() + responseContent.on('data', function (chunk) { + if (self.timing && !self.responseStarted) { + self.responseStartTime = (new Date()).getTime() - // NOTE: responseStartTime is deprecated in favor of .timings - response.responseStartTime = self.responseStartTime - } - // if response threshold is set, update the response bytes left to hit - // threshold. If exceeds, abort the request. - if (responseThresholdEnabled) { - responseBytesLeft -= chunk.length - if (responseBytesLeft < 0) { - self.emit('error', new Error('Maximum response size reached')) - self.destroy() - self.abort() - return + // NOTE: responseStartTime is deprecated in favor of .timings + response.responseStartTime = self.responseStartTime } - } - self._destdata = true - self.emit('data', chunk) - }) - responseContent.once('end', function (chunk) { - self._reqResInfo.response.downloadedBytes = downloadSizeTracker.size - self.emit('end', chunk) - }) - responseContent.on('error', function (error) { - self.emit('error', error) - }) - responseContent.on('close', function () { self.emit('close') }) - - if (self.callback) { - self.readResponseBody(response) - } else { // if no callback - self.on('end', function () { - if (self._aborted) { - debug('aborted', self.uri.href) - return + // if response threshold is set, update the response bytes left to hit + // threshold. If exceeds, abort the request. + if (responseThresholdEnabled) { + responseBytesLeft -= chunk.length + if (responseBytesLeft < 0) { + self.emit('error', new Error('Maximum response size reached')) + self.destroy() + self.abort() + return + } } - self.emit('complete', response) + self._destdata = true + self.emit('data', chunk) }) - } + responseContent.once('end', function (chunk) { + self._reqResInfo.response.downloadedBytes = downloadSizeTracker.size + self.emit('end', chunk) + }) + responseContent.on('error', function (error) { + self.emit('error', error) + }) + responseContent.on('close', function () { self.emit('close') }) + + if (self.callback) { + self.readResponseBody(response) + } else { // if no callback + self.on('end', function () { + if (self._aborted) { + debug('aborted', self.uri.href) + return + } + self.emit('complete', response) + }) + } + }) } function forEachAsync (items, fn, cb) { diff --git a/tests/test-redirect.js b/tests/test-redirect.js index 65b6771a0..a0fdb19cc 100644 --- a/tests/test-redirect.js +++ b/tests/test-redirect.js @@ -6,6 +6,7 @@ var request = require('../index') var tape = require('tape') var http = require('http') var destroyable = require('server-destroy') +var Promise = require('bluebird') var s = server.createServer() var ss = server.createSSLServer() @@ -429,6 +430,105 @@ tape('triple bounce terminated after second redirect', function (t) { }) }) +tape('asynchronous followRedirect filter via callback', function (t) { + function filterAsync (response, callback) { + setImmediate(function () { callback(null, true) }) + return false // it should ignore this due to the function arity + } + + hits = {} + request({ + uri: s.url + '/temp', + jar: jar, + headers: { cookie: 'foo=bar' }, + followRedirect: filterAsync + }, function (err, res, body) { + t.equal(err, null) + t.equal(res.statusCode, 200) + t.ok(hits.temp, 'Original request is to /temp') + t.ok(hits.temp_landing, 'Forward to temporary landing URL') + t.equal(body, 'GET temp_landing', 'Got temporary landing content') + t.end() + }) +}) + +tape('asynchronous followRedirect filter via callback with error', function (t) { + var sampleError = new Error('Error during followRedirect callback') + function filterAsync (response, callback) { + setImmediate(function () { callback(sampleError) }) + } + + hits = {} + request({ + uri: s.url + '/temp', + jar: jar, + headers: { cookie: 'foo=bar' }, + followRedirect: filterAsync + }, function (err, res, body) { + t.equal(err, sampleError) + t.end() + }) +}) + +tape('asynchronous followRedirect filter via promise', function (t) { + function filterPromise (response) { + return Promise.resolve(false) + } + + hits = {} + request({ + uri: s.url + '/temp', + jar: jar, + headers: { cookie: 'foo=bar' }, + followRedirect: filterPromise + }, function (err, res, body) { + t.equal(err, null) + t.equal(res.statusCode, 301) + t.ok(hits.temp, 'Original request is to /temp') + t.ok(!hits.temp_landing, 'No chasing the redirect promise returns false') + t.equal(res.statusCode, 301, 'Response is the bounce itself') + t.end() + }) +}) + +tape('asynchronous followRedirect filter via promise (rejected)', function (t) { + var sampleError = new Error('Rejected followRedirect promise') + function filterPromise (response) { + return Promise.reject(sampleError) + } + + hits = {} + request({ + uri: s.url + '/temp', + jar: jar, + headers: { cookie: 'foo=bar' }, + followRedirect: filterPromise + }, function (err, res, body) { + t.equal(err, sampleError) + t.end() + }) +}) + +tape('overridden redirect url', function (t) { + hits = {} + request({ + uri: s.url + '/temp', + jar: jar, + headers: { cookie: 'foo=bar' }, + followRedirect: function (response) { + if (/temp_landing/.test(response.headers.location)) return s.url + '/perm' + return true + } + }, function (err, res, body) { + t.equal(err, null) + t.equal(res.statusCode, 200) + t.ok(hits.perm, 'Original request to /perm') + t.ok(hits.perm_landing, 'Forward to permanent landing URL') + t.equal(body, 'GET perm_landing', 'Got permanent landing content') + t.end() + }) +}) + tape('http to https redirect', function (t) { hits = {} request.get({