diff --git a/README.md b/README.md index c67f27e9..219d63c9 100644 --- a/README.md +++ b/README.md @@ -271,6 +271,23 @@ The `verify` option, if supplied, is called as `verify(req, res, buf, encoding)` where `buf` is a `Buffer` of the raw request body and `encoding` is the encoding of the request. The parsing can be aborted by throwing an error. +##### defaultCharset + +The default charset to parse as, if not specified in content-type. Must be +either `utf-8` or `iso-8859-1`. Defaults to `utf-8`. + +##### charsetSentinel + +Whether to let the value of the `utf8` parameter take precedence as the charset +selector. It requires the form to contain a parameter named `utf8` with a value +of `✓`. Defaults to `false`. + +##### interpretNumericEntities + +Whether to decode numeric entities such as `☺` when parsing an iso-8859-1 +form. Defaults to `false`. + + ## Errors The middlewares provided by this module create errors using the diff --git a/lib/read.js b/lib/read.js index 35688f99..20a18ac8 100644 --- a/lib/read.js +++ b/lib/read.js @@ -122,7 +122,7 @@ function read (req, res, next, parse, debug, options) { str = typeof body !== 'string' && encoding !== null ? iconv.decode(body, encoding) : body - req.body = parse(str) + req.body = parse(str, encoding) } catch (err) { next(createError(400, err, { body: str, diff --git a/lib/types/urlencoded.js b/lib/types/urlencoded.js index 6bef1141..1c1ff71d 100644 --- a/lib/types/urlencoded.js +++ b/lib/types/urlencoded.js @@ -45,11 +45,18 @@ function urlencoded (options) { : opts.limit var type = opts.type || 'application/x-www-form-urlencoded' var verify = opts.verify || false + var charsetSentinel = opts.charsetSentinel + var interpretNumericEntities = opts.interpretNumericEntities if (verify !== false && typeof verify !== 'function') { throw new TypeError('option verify must be function') } + var defaultCharset = opts.defaultCharset || 'utf-8' + if (defaultCharset !== 'utf-8' && defaultCharset !== 'iso-8859-1') { + throw new TypeError('option defaultCharset must be either utf-8 or iso-8859-1') + } + // create the appropriate query parser var queryparse = createQueryParser(opts, extended) @@ -58,9 +65,9 @@ function urlencoded (options) { ? typeChecker(type) : type - function parse (body) { + function parse (body, encoding) { return body.length - ? queryparse(body) + ? queryparse(body, encoding) : {} } @@ -92,8 +99,8 @@ function urlencoded (options) { } // assert charset - var charset = getCharset(req) || 'utf-8' - if (charset !== 'utf-8') { + var charset = getCharset(req) || defaultCharset + if (charset !== 'utf-8' && charset !== 'iso-8859-1') { debug('invalid charset') next(createError(415, 'unsupported charset "' + charset.toUpperCase() + '"', { charset: charset, @@ -108,7 +115,9 @@ function urlencoded (options) { encoding: charset, inflate: inflate, limit: limit, - verify: verify + verify: verify, + charsetSentinel: charsetSentinel, + interpretNumericEntities: interpretNumericEntities }) } } @@ -123,6 +132,8 @@ function createQueryParser (options, extended) { var parameterLimit = options.parameterLimit !== undefined ? options.parameterLimit : 1000 + var charsetSentinel = options.charsetSentinel + var interpretNumericEntities = options.interpretNumericEntities if (isNaN(parameterLimit) || parameterLimit < 1) { throw new TypeError('option parameterLimit must be a positive number') @@ -134,7 +145,7 @@ function createQueryParser (options, extended) { var depth = extended ? Infinity : 0 - return function queryparse (body) { + return function queryparse (body, encoding) { var paramCount = parameterCount(body, parameterLimit) if (paramCount === undefined) { @@ -152,7 +163,10 @@ function createQueryParser (options, extended) { allowPrototypes: true, arrayLimit: arrayLimit, depth: depth, - parameterLimit: parameterLimit + parameterLimit: parameterLimit, + charsetSentinel: charsetSentinel, + interpretNumericEntities: interpretNumericEntities, + charset: encoding }) } } diff --git a/test/urlencoded.js b/test/urlencoded.js index 8be8a5a0..3258d094 100644 --- a/test/urlencoded.js +++ b/test/urlencoded.js @@ -48,6 +48,74 @@ describe('bodyParser.urlencoded()', function () { .expect(200, '{}', done) }) + var extendedValues = [true, false] + extendedValues.forEach(function (extended) { + describe('in ' + (extended ? 'extended' : 'simple') + ' mode', function () { + it('should parse x-www-form-urlencoded with an explicit iso-8859-1 encoding', function (done) { + var server = createServer({ extended: extended }) + request(server) + .post('/') + .set('Content-Type', 'application/x-www-form-urlencoded; charset=iso-8859-1') + .send('%A2=%BD') + .expect(200, '{"¢":"½"}', done) + }) + + it('should parse x-www-form-urlencoded with unspecified iso-8859-1 encoding when the defaultCharset is set to iso-8859-1', function (done) { + var server = createServer({ defaultCharset: 'iso-8859-1', extended: extended }) + request(server) + .post('/') + .set('Content-Type', 'application/x-www-form-urlencoded') + .send('%A2=%BD') + .expect(200, '{"¢":"½"}', done) + }) + + it('should parse x-www-form-urlencoded with an unspecified iso-8859-1 encoding when the utf8 sentinel has a value of %26%2310003%3B', function (done) { + var server = createServer({ charsetSentinel: true, extended: extended }) + request(server) + .post('/') + .set('Content-Type', 'application/x-www-form-urlencoded') + .send('utf8=%26%2310003%3B&user=%C3%B8') + .expect(200, '{"user":"ø"}', done) + }) + + it('should parse x-www-form-urlencoded with an unspecified utf-8 encoding when the utf8 sentinel has a value of %E2%9C%93 and the defaultCharset is iso-8859-1', function (done) { + var server = createServer({ charsetSentinel: true, extended: extended }) + request(server) + .post('/') + .set('Content-Type', 'application/x-www-form-urlencoded') + .send('utf8=%E2%9C%93&user=%C3%B8') + .expect(200, '{"user":"ø"}', done) + }) + + it('should not leave an empty string parameter when removing the utf8 sentinel from the start of the string', function (done) { + var server = createServer({ charsetSentinel: true, extended: extended }) + request(server) + .post('/') + .set('Content-Type', 'application/x-www-form-urlencoded') + .send('utf8=%E2%9C%93&foo=bar') + .expect(200, '{"foo":"bar"}', done) + }) + + it('should not leave an empty string parameter when removing the utf8 sentinel from the middle of the string', function (done) { + var server = createServer({ charsetSentinel: true, extended: extended }) + request(server) + .post('/') + .set('Content-Type', 'application/x-www-form-urlencoded') + .send('foo=bar&utf8=%E2%9C%93&baz=quux') + .expect(200, '{"foo":"bar","baz":"quux"}', done) + }) + + it('should not leave an empty string parameter when removing the utf8 sentinel from the end of the string', function (done) { + var server = createServer({ charsetSentinel: true, extended: extended }) + request(server) + .post('/') + .set('Content-Type', 'application/x-www-form-urlencoded') + .send('foo=bar&baz=quux&utf8=%E2%9C%93') + .expect(200, '{"foo":"bar","baz":"quux"}', done) + }) + }) + }) + it('should handle empty message-body', function (done) { request(createServer({ limit: '1kb' })) .post('/')