Skip to content

Commit

Permalink
Added auto-detection of UTF-7 via BOM. My merge request to add UTF-32…
Browse files Browse the repository at this point in the history
… support to iconv-lite has been approved, so now this project has gone back to using the official release of iconv-lite.
  • Loading branch information
kshetline committed Jun 27, 2019
1 parent cbfea0a commit 4e18f0e
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 45 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ let currentTemperature = parseFloat((await requestText('https://howhotisit.biz/f
.replace(/.*Current temperature:\s*([-.0-9]+).*/is, '$1'));
```

HTTP(S) redirects are automatically handled using [follow-redirects](https://github.com/follow-redirects/follow-redirects), and a wide variety of character encodings are supported using [iconv-lite](https://github.com/ashtuchkin/iconv-lite). _(UTF-32, not currently covered by the current release of iconv-lite, is also supported.)_
HTTP(S) redirects are automatically handled using [follow-redirects](https://github.com/follow-redirects/follow-redirects), and a wide variety of character encodings are supported using [iconv-lite](https://github.com/ashtuchkin/iconv-lite).

HTTP(S) responses which are compressed using the `gzip`, `deflate` or `br` methods are automatically decompressed. (When using `requestBinary()` or `requestFile()`, this automatic decompression can be disabled.)

Expand Down
9 changes: 9 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Change Log

## 1.1.2

* Added auto-detection of UTF-7 via BOM.
* My merge request to add UTF-32 support to iconv-lite has been approved, so now this project has gone back to using the official release of iconv-lite.

## 1.1.1

* No feature changes or bug fixes, just updated unit testing to use mocha and chai instead of jasmine.

## 1.1.0

* Added support for UTF-32 encoding.
Expand Down
33 changes: 21 additions & 12 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "by-request",
"version": "1.1.1",
"version": "1.1.2",
"description": "Simple Node http client for use with promises, async/await.",
"main": "dist/index.js",
"typings": "dist/index",
Expand Down Expand Up @@ -40,7 +40,7 @@
"dependencies": {
"follow-redirects": "^1.7.0",
"http-status-codes": "^1.3.2",
"ks-iconv-lite": "^0.5.2",
"iconv-lite": "^0.5.0",
"lodash": "^4.17.11"
},
"devDependencies": {
Expand Down
60 changes: 32 additions & 28 deletions src/by-request.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import { RequestOptions } from 'http';
import zlib from 'zlib';
import { http, https } from 'follow-redirects';
import { parse as parseUrl } from 'url';
import iconv from 'ks-iconv-lite';
import iconv from 'iconv-lite';
import { UNSUPPORTED_MEDIA_TYPE } from 'http-status-codes';
import { Writable } from 'stream';

Expand Down Expand Up @@ -104,7 +104,7 @@ export async function request(urlOrOptions: string | ExtendedRequestOptions,
let autodetect = !forceEncoding && !binary;
let bytesRead = 0;
let bomDetected = false;
let bomRemoved = false;
let removeBom = false;

if (!options.dontDecompress || !binary) {
if (contentEncoding === 'gzip') {
Expand Down Expand Up @@ -183,11 +183,9 @@ export async function request(urlOrOptions: string | ExtendedRequestOptions,
}

if (content.length === 0 && !options.ignoreBom) {
const bom = checkBOM(data);

if (bom) {
const [bomLength, bomCharset] = bom.split(':');
const bomCharset = checkBOM(data);

if (bomCharset) {
if (!forceEncoding) {
if (!iconv.encodingExists(bomCharset)) {
reject(UNSUPPORTED_MEDIA_TYPE);
Expand All @@ -201,10 +199,8 @@ export async function request(urlOrOptions: string | ExtendedRequestOptions,

bomDetected = true;

if (!options.keepBom) {
data = data.slice(Number(bomLength));
bomRemoved = true;
}
if (!options.keepBom)
removeBom = true;
}
}

Expand Down Expand Up @@ -242,7 +238,7 @@ export async function request(urlOrOptions: string | ExtendedRequestOptions,
if (options.responseInfo) {
options.responseInfo({
bomDetected,
bomRemoved,
bomRemoved: removeBom,
charset: binary ? 'binary' : charset,
contentEncoding: contentEncoding || 'identity',
contentLength: bytesRead,
Expand All @@ -259,10 +255,19 @@ export async function request(urlOrOptions: string | ExtendedRequestOptions,
}
else if (binary)
resolve(content);
else if (usingIconv)
resolve(iconv.decode(content, charset, options.ignoreBom ? { stripBOM: false } : undefined));
else
resolve(content.toString(charset));
else {
let text: string;

if (usingIconv)
text = iconv.decode(content, charset, { stripBOM: false });
else
text = content.toString(charset);

if (removeBom && text.charCodeAt(0) === 0xFEFF)
text = text.substr(1);

resolve(text);
}
});
}
else {
Expand Down Expand Up @@ -299,15 +304,18 @@ function checkBOM(buffer: Buffer): string {
const bom = Array.from(buffer.slice(0, Math.min(buffer.length, 4)));

if (bom[0] === 0x00 && bom[1] === 0x00 && bom[2] === 0xFE && bom[3] === 0xFF)
return '4:utf-32be';
return 'utf-32be';
else if (bom[0] === 0xFF && bom[1] === 0xFE && bom[2] === 0x00 && bom[3] === 0x00)
return '4:utf-32le';
return 'utf-32le';
else if (bom[0] === 0xFE && bom[1] === 0xFF)
return '2:utf-16be';
return 'utf-16be';
else if (bom[0] === 0xFF && bom[1] === 0xFE)
return '2:utf-16le';
return 'utf-16le';
else if (bom[0] === 0xEF && bom[1] === 0xBB && bom[2] === 0xBF)
return '3:utf8';
return 'utf8';
else if (bom[0] === 0x2B && bom[1] === 0x2F && bom[2] === 0x76 &&
(bom[3] === 0x2B || bom[3] === 0x2F || bom[3] === 0x38 || bom[3] === 0x39))
return 'utf7';

return null;
}
Expand All @@ -316,18 +324,14 @@ function lookForEmbeddedEncoding(buffer: Buffer): string {
// First make sure this isn't likely to be a 16- or 32-bit encoding.
const start = Array.from(buffer.slice(0, Math.min(buffer.length, 4)));

if (start[0] === 0 && start[1] === 0 && (start[2] !== 0 || start[3] !== 0)) {
if (start[0] === 0 && start[1] === 0 && (start[2] !== 0 || start[3] !== 0))
return 'utf-32be';
}
else if ((start[0] !== 0 || start[1] !== 0) && start[2] === 0 && start[3] === 0) {
else if ((start[0] !== 0 || start[1] !== 0) && start[2] === 0 && start[3] === 0)
return 'utf-32le';
}
else if (start[0] === 0 && start[1] !== 0) {
else if (start[0] === 0 && start[1] !== 0)
return 'utf-16be';
}
else if (start[0] !== 0 && start[1] === 0) {
else if (start[0] !== 0 && start[1] === 0)
return 'utf-16le';
}

const text = buffer.slice(0, Math.min(buffer.length, MAX_EXAMINE)).toString('ascii').toLowerCase().replace('\n', ' ').trim();
// Strip line breaks and comments first
Expand Down
10 changes: 9 additions & 1 deletion src/request-text.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,15 @@ describe('request-text', () => {

it('should be able to identity various UTF formats according to BOM', async () => {
let responseInfo: ResponseInfo = null;
let content = await requestText(`http://localhost:${port}/test12/?enc=utf8`, {
let content = await requestText(`http://localhost:${port}/test12/?enc=utf7`, {
responseInfo: info => responseInfo = info
});

expect(content).equals(TEST_TEXT_3);
expect(responseInfo.bomRemoved).to.be.true;
expect(responseInfo.charset).equals('utf7');

content = await requestText(`http://localhost:${port}/test12/?enc=utf8`, {
responseInfo: info => responseInfo = info
});

Expand Down
2 changes: 1 addition & 1 deletion src/test-server.spec.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import compression from 'compression';
import express, { Application, Request, Response } from 'express';
import iconv from 'ks-iconv-lite';
import iconv from 'iconv-lite';
import * as zlib from 'zlib';
import { Server } from 'http';

Expand Down

0 comments on commit 4e18f0e

Please sign in to comment.