From 1ab6ece475804bf2919e55ec9ca86d905167cbf9 Mon Sep 17 00:00:00 2001 From: fengmk2 Date: Thu, 20 Aug 2015 17:26:53 +0800 Subject: [PATCH] feat: add options.fixJSONCtlChars to fix JSON control characters Detail please see #77 --- README.md | 1 + lib/urllib.js | 31 +++++++++++++++++++++++++++++-- test/fixtures/server.js | 2 ++ test/urllib.test.js | 31 +++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 83e2cc32..d2f9a5d0 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,7 @@ httpclient.request('http://nodejs.org', function (err, body) { - ***writeStream*** [stream.Writable](http://nodejs.org/api/stream.html#stream_class_stream_writable) - A writable stream to be piped by the response stream. Responding data will be write to this stream and `callback` will be called with `data` set `null` after finished writing. - ***contentType*** String - Type of request data. Could be `json`. If it's `json`, will auto set `Content-Type: application/json` header. - ***dataType*** String - Type of response data. Could be `text` or `json`. If it's `text`, the `callback`ed `data` would be a String. If it's `json`, the `data` of callback would be a parsed JSON Object. Default `callback`ed `data` would be a `Buffer`. + - **fixJSONCtlChars** Boolean - Fix the control characters (U+0000 through U+001F) before JSON parse response. Default is `false`. - ***headers*** Object - Request headers. - ***timeout*** Number - Request timeout in milliseconds. Defaults to `exports.TIMEOUT`. Include remote server connecting timeout and response timeout. When timeout happen, will return `ConnectionTimeout` or `ResponseTimeout`. - ***auth*** String - `username:password` used in HTTP Basic Authorization. diff --git a/lib/urllib.js b/lib/urllib.js index baf0fdc0..da1c5b68 100644 --- a/lib/urllib.js +++ b/lib/urllib.js @@ -84,6 +84,8 @@ var TEXT_DATA_TYPES = [ * - {String} [method]: optional, could be GET | POST | DELETE | PUT, default is GET * - {String} [contentType]: optional, request data type, could be `json`, default is undefined * - {String} [dataType]: optional, response data type, could be `text` or `json`, default is buffer + * - {Boolean} [fixJSONCtlChars]: optional, fix the control characters (U+0000 through U+001F) + * before JSON parse response. Default is `false` * - {Object} [headers]: optional, request headers * - {Number} [timeout]: request timeout(in milliseconds), default is `exports.TIMEOUT` * - {Agent} [agent]: optional, http agent. Set `false` if you does not use agent. @@ -186,6 +188,7 @@ exports.requestWithCallback = function (url, args, callback) { var port = parsedUrl.port || 80; var httplib = http; var agent = args.agent || exports.agent; + var fixJSONCtlChars = !!args.fixJSONCtlChars; if (parsedUrl.protocol === 'https:') { httplib = https; @@ -569,7 +572,7 @@ exports.requestWithCallback = function (url, args, callback) { if (responseSize === 0) { data = null; } else { - var r = parseJSON(data); + var r = parseJSON(data, fixJSONCtlChars); if (r.error) { err = r.error; } else { @@ -665,11 +668,35 @@ exports.requestWithCallback = function (url, args, callback) { return req; }; -function parseJSON(data) { +var JSONCtlCharsMap = { + '"': '\\"', // \u0022 + '\\': '\\\\', // \u005c + '\b': '\\b', // \u0008 + '\f': '\\f', // \u000c + '\n': '\\n', // \u000a + '\r': '\\r', // \u000d + '\t': '\\t' // \u0009 +}; +var JSONCtlCharsRE = /[\u0000-\u001F\u005C]/g; + +function _replaceOneChar(c) { + return JSONCtlCharsMap[c] || '\\u' + (c.charCodeAt(0) + 0x10000).toString(16).substr(1); +} + +function replaceJSONCtlChars(str) { + return str.replace(JSONCtlCharsRE, _replaceOneChar); +} + +function parseJSON(data, fixJSONCtlChars) { var result = { error: null, data: null }; + if (fixJSONCtlChars) { + // https://github.com/node-modules/urllib/pull/77 + // remote the control characters (U+0000 through U+001F) + data = replaceJSONCtlChars(data); + } try { result.data = JSON.parse(data); } catch (err) { diff --git a/test/fixtures/server.js b/test/fixtures/server.js index 904e6e29..6e2f0e58 100644 --- a/test/fixtures/server.js +++ b/test/fixtures/server.js @@ -159,6 +159,8 @@ var server = http.createServer(function (req, res) { } else if (req.url === '/errorcharset') { res.setHeader('Content-Type', 'text/plain;charset=notfound'); return res.end('你好'); + } else if (req.url === '/json_with_controls_unicode') { + return res.end(new Buffer('{"foo":"bar\u000e!1!\u0086!2!\u0000!3!\u001F!4!\u005C!5!end\u005C\\"}')); } var url = req.url.split('?'); diff --git a/test/urllib.test.js b/test/urllib.test.js index f4a235f4..d8febc8e 100644 --- a/test/urllib.test.js +++ b/test/urllib.test.js @@ -1161,4 +1161,35 @@ describe('urllib.test.js', function () { }); }); }); + + describe('options.fixJSONCtlChars = true | false', function () { + + it('should auto fix json control characters', function (done) { + urllib.request(host + '/json_with_controls_unicode', { + dataType: 'json', + fixJSONCtlChars: true, + }, function (err, data) { + should.not.exist(err); + data.should.eql({ + foo: 'bar\u000e!1!†!2!\u0000!3!\u001f!4!\\!5!end\\\\' + }); + done(); + }); + }); + + it('should throw error when response has json control characters', function (done) { + urllib.request(host + '/json_with_controls_unicode', { + dataType: 'json', + // fixJSONCtlChars: false, + }, function (err, data) { + should.exist(err); + err.name.should.equal('JSONResponseFormatError'); + err.message.should.containEql('Unexpected token \u000e (data json format:'); + data.should.equal('{"foo":"bar\u000e!1!†!2!\u0000!3!\u001f!4!\\!5!end\\\\"}'); + done(); + }); + }); + + }); + });