Skip to content

Commit

Permalink
Replace Buffer with Uint8Array
Browse files Browse the repository at this point in the history
  • Loading branch information
bjornstar committed Jun 27, 2024
1 parent 4a00de1 commit 045717d
Show file tree
Hide file tree
Showing 16 changed files with 88 additions and 170 deletions.
32 changes: 18 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

# token-types

A primitive token library used to read and write from a node `Buffer`.
Although it is possible to use this module directly, it is primary designed to be used with [strtok3 tokenizer](https://github.com/Borewit/strtok3).
A primitive token library used to read and write from a `UInt8Array`.
Although it is possible to use this module directly, it is primarily designed to be used with [strtok3 tokenizer](https://github.com/Borewit/strtok3).

## Compatibility

Expand Down Expand Up @@ -41,7 +41,7 @@ npm install --save-dev @tokenizer/token
```js
import * as strtok3 from 'strtok3';
import * as token from 'token-types';

(async () => {

const tokenizer = await strtok3.fromFile("somefile.bin");
Expand All @@ -50,15 +50,15 @@ import * as token from 'token-types';
console.log(`My number: ${myNumber}`);
} finally {
tokenizer.close(); // Close the file
}
}
})();
```

## Tokens

### Numeric tokens

`node-strtok` supports a wide variety of numerical tokens out of the box:
`token-types` supports a wide variety of numeric tokens out of the box:

| Token | Number | Bits | Endianness |
|---------------|------------------|------|----------------|
Expand Down Expand Up @@ -89,19 +89,23 @@ import * as token from 'token-types';
| `Float80_BE`* | IEEE 754 float | 80 | big endian |
| `Float80_LE`* | IEEE 754 float | 80 | little endian |

### Other tokens
(*) The tokens exceed the JavaScript IEEE 754 64-bit Floating Point precision, decoding and encoding is best effort based.

### String tokens

StringType decoding is implemented using TextDecoder which supports a large number of encodings including but not limited to:

* UTF-8 (the default)
* Windows-1252
* ISO-8859-1

String types:
* Windows-1252
* ISO-8859-1

*) The tokens exceed the JavaScript IEEE 754 64-bit Floating Point precision, decoding and encoding is best effort based.
Check out [the MDN web docs for the TextDecoder](https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder/encoding) for a complete list

### Custom token
### Custom tokens

Complex tokens can be added, which makes very suitable for reading binary files or network messages:
Custom tokens can be added, suitable for reading binary files or network messages:
```js
ExtendedHeader = {
ExtendedHeader = {
len: 10,

get: (buf, off) => {
Expand Down
71 changes: 10 additions & 61 deletions lib/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import * as ieee754 from 'ieee754';
import { IToken, IGetToken } from '@tokenizer/token';
import { Buffer } from 'node:buffer';
import type { IToken, IGetToken } from '@tokenizer/token';

// Primitive types

Expand Down Expand Up @@ -429,26 +428,18 @@ export class Uint8ArrayType implements IGetToken<Uint8Array> {
}
}

export class BufferType implements IGetToken<Uint8Array, Buffer> {

public constructor(public len: number) {
}

public get(uint8Array: Uint8Array, off: number): Buffer {
return Buffer.from(uint8Array.subarray(off, off + this.len));
}
}

/**
* Consume a fixed number of bytes from the stream and return a string with a specified encoding.
*/
export class StringType implements IGetToken<string, Buffer> {
export class StringType implements IGetToken<string> {
private textDecoder: TextDecoder;

public constructor(public len: number, public encoding: BufferEncoding) {
public constructor(public len: number, public encoding: string) {
this.textDecoder = new TextDecoder(encoding);
}

public get(uint8Array: Uint8Array, offset: number): string {
return Buffer.from(uint8Array).toString(this.encoding, offset, offset + this.len);
return this.textDecoder.decode(uint8Array.subarray(offset, offset + this.len));
}
}

Expand All @@ -457,55 +448,13 @@ export class StringType implements IGetToken<string, Buffer> {
* Using windows-1252 / ISO 8859-1 decoding
*/
export class AnsiStringType implements IGetToken<string> {

private static windows1252 = [8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352,
8249, 338, 141, 381, 143, 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732,
8482, 353, 8250, 339, 157, 382, 376, 160, 161, 162, 163, 164, 165, 166, 167, 168,
169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184,
185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216,
217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232,
233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247,
248, 249, 250, 251, 252, 253, 254, 255];

private static decode(buffer: Uint8Array, offset: number, until: number): string {
let str = '';
for (let i = offset; i < until; ++i) {
str += AnsiStringType.codePointToString(AnsiStringType.singleByteDecoder(buffer[i]));
}
return str;
}

private static inRange(a: number, min: number, max: number): boolean {
return min <= a && a <= max;
}

private static codePointToString(cp: number): string {
if (cp <= 0xFFFF) {
return String.fromCharCode(cp);
} else {
cp -= 0x10000;
return String.fromCharCode((cp >> 10) + 0xD800, (cp & 0x3FF) + 0xDC00);
}
}

private static singleByteDecoder(bite: number): number {
if (AnsiStringType.inRange(bite, 0x00, 0x7F)) {
return bite;
}

const codePoint = AnsiStringType.windows1252[bite - 0x80];
if (codePoint === null) {
throw Error('invaliding encoding');
}

return codePoint;
}
private textDecoder: TextDecoder;

public constructor(public len: number) {
this.textDecoder = new TextDecoder('windows-1252');
}

public get(buffer: Buffer, offset: number = 0): string {
return AnsiStringType.decode(buffer, offset, offset + this.len);
public get(uint8Array: Uint8Array, offset: number = 0): string {
return this.textDecoder.decode(uint8Array.subarray(offset, offset + this.len));
}
}
22 changes: 1 addition & 21 deletions test/test-classes.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,5 @@
import { assert } from 'chai';
import { Buffer } from 'node:buffer';
import { BufferType, StringType, Uint8ArrayType } from '../lib/index.js';

describe('BufferType', () => {

it('Should copy data fom the source array', () => {

const source = Buffer.from([0xa1, 0xa2, 0xb1, 0xb2, 0xc1, 0xc2]);

const bufferTypeToken = new BufferType(2);
const bufferResult = bufferTypeToken.get(source, 2);

assert.deepStrictEqual(bufferResult, Buffer.from([0xb1, 0xb2]), 'should be 2 middle bytes: 0xb1, 0xb2');

// Overwrite the result
bufferResult[0] = 0xd1;
bufferResult[1] = 0xd2;

assert.notDeepEqual(source, Buffer.from([0xa1, 0xa2, 0xd1, 0xd2, 0xc1, 0xc2]), 'should copy the data');
});
});
import { StringType, Uint8ArrayType } from '../lib/index.js';

describe('Uint8ArrayType', () => {

Expand Down
33 changes: 16 additions & 17 deletions test/test-ieee-754-floats.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Test writing and reading uint8 values.

import { assert } from 'chai';
import { Buffer } from 'node:buffer';
import * as Token from '../lib/index.js';
import * as util from './util.js';

Expand All @@ -13,7 +12,7 @@ describe('IEEE 754 floats', () => {

it('should encode', () => {

const buf = Buffer.alloc(2);
const buf = new Uint8Array(2);

Token.Float16_BE.put(buf, 0, 0.0);
util.checkBuffer(buf, '0000');
Expand All @@ -26,7 +25,7 @@ describe('IEEE 754 floats', () => {
});

it('should decode', () => {
const buf = Buffer.from('\x55\x52', 'binary');
const buf = new Uint8Array([0x55, 0x52]);
assert.strictEqual(Token.Float16_BE.get(buf, 0), 85.125);
});

Expand All @@ -36,7 +35,7 @@ describe('IEEE 754 floats', () => {

it('should encode', () => {

const buf = Buffer.alloc(2);
const buf = new Uint8Array(2);

Token.Float16_LE.put(buf, 0, 0.0);
util.checkBuffer(buf, '0000');
Expand All @@ -49,7 +48,7 @@ describe('IEEE 754 floats', () => {
});

it('should decode', () => {
const buf = Buffer.from('\x52\x55', 'binary');
const buf = new Uint8Array([0x52, 0x55]);
assert.strictEqual(Token.Float16_LE.get(buf, 0), 85.125);
});

Expand All @@ -62,7 +61,7 @@ describe('IEEE 754 floats', () => {

it('should encode', () => {

const buffer = Buffer.alloc(4);
const buffer = new Uint8Array(4);

Token.Float32_BE.put(buffer, 0, 0.0);
util.checkBuffer(buffer, '00000000');
Expand All @@ -75,7 +74,7 @@ describe('IEEE 754 floats', () => {
});

it('should decode', () => {
const buf = Buffer.from('\x42\xAA\x40\x00', 'binary');
const buf = new Uint8Array([0x42, 0xAA, 0x40, 0x00]);
assert.strictEqual(Token.Float32_BE.get(buf, 0), 85.125);
});

Expand All @@ -85,7 +84,7 @@ describe('IEEE 754 floats', () => {

it('should encode', () => {

const buf = Buffer.alloc(4);
const buf = new Uint8Array(4);

Token.Float32_LE.put(buf, 0, 0.0);
util.checkBuffer(buf, '00000000');
Expand All @@ -98,7 +97,7 @@ describe('IEEE 754 floats', () => {
});

it('should decode', () => {
const buf = Buffer.from('\x00\x40\xAA\x42', 'binary');
const buf = new Uint8Array([0x00, 0x40, 0xAA, 0x42]);
assert.strictEqual(Token.Float32_LE.get(buf, 0), 85.125);
});

Expand All @@ -111,7 +110,7 @@ describe('IEEE 754 floats', () => {

it('should encode', () => {

const buf = Buffer.alloc(8);
const buf = new Uint8Array(8);

Token.Float64_BE.put(buf, 0, 0.0);
util.checkBuffer(buf, '0000000000000000');
Expand All @@ -124,7 +123,7 @@ describe('IEEE 754 floats', () => {
});

it('should decode', () => {
const buf = Buffer.from('\x40\x55\x48\x00\x00\x00\x00\x00', 'binary');
const buf = new Uint8Array([0x40, 0x55, 0x48, 0x00, 0x00, 0x00, 0x00, 0x00]);
assert.strictEqual(Token.Float64_BE.get(buf, 0), 85.125);
});

Expand All @@ -134,7 +133,7 @@ describe('IEEE 754 floats', () => {

it('should encode', () => {

const buf = Buffer.alloc(8);
const buf = new Uint8Array(8);

Token.Float64_LE.put(buf, 0, 0.0);
util.checkBuffer(buf, '0000000000000000');
Expand All @@ -147,7 +146,7 @@ describe('IEEE 754 floats', () => {
});

it('should decode', () => {
const buf = Buffer.from('\x00\x00\x00\x00\x00\x48\x55\x40', 'binary');
const buf = new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x55, 0x40]);
assert.strictEqual(Token.Float64_LE.get(buf, 0), 85.125);
});

Expand All @@ -160,7 +159,7 @@ describe('IEEE 754 floats', () => {

it('should encode', () => {

const buf = Buffer.alloc(10);
const buf = new Uint8Array(10);

Token.Float80_BE.put(buf, 0, 0.0);
util.checkBuffer(buf, '00000000000000000000');
Expand All @@ -173,7 +172,7 @@ describe('IEEE 754 floats', () => {
});

it('should decode', () => {
const buf = Buffer.from('\x40\x02\xAA\x40\x00\x00\x00\x00\x00\x00\x00\x00', 'binary');
const buf = new Uint8Array([0x40, 0x02, 0xAA, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
assert.strictEqual(Token.Float80_BE.get(buf, 0), 85.125);
});

Expand All @@ -183,7 +182,7 @@ describe('IEEE 754 floats', () => {

it('should encode', () => {

const buf = Buffer.alloc(10);
const buf = new Uint8Array(10);

Token.Float80_LE.put(buf, 0, 0.0);
util.checkBuffer(buf, '00000000000000000000');
Expand All @@ -196,7 +195,7 @@ describe('IEEE 754 floats', () => {
});

it.skip('should decode', () => {
const buf = Buffer.from('\x00\x00\x00\x00\x00\x00\x00\x00\x40\xAA\x02\x40', 'binary');
const buf = new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0xAA, 0x02, 0x40]);
assert.strictEqual(Token.Float80_LE.get(buf, 0), 85.125);
});

Expand Down
9 changes: 4 additions & 5 deletions test/test-int16.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Test reading int16 values.

import { assert } from 'chai';
import { Buffer } from 'node:buffer';
import * as Token from '../lib/index.js';
import * as util from './util.js';

Expand All @@ -11,7 +10,7 @@ describe('Parse 16-bit signed integer', () => {

it('should encode', () => {

const buf = Buffer.alloc(2);
const buf = new Uint8Array(2);

Token.INT16_BE.put(buf, 0, 0x00);
util.checkBuffer(buf, '0000');
Expand All @@ -25,7 +24,7 @@ describe('Parse 16-bit signed integer', () => {

it('should decode', () => {

const buf = Buffer.from('\x0a\x1a\x00\x00\xff\xff\x80\x00', 'binary');
const buf = new Uint8Array([0x0a, 0x1a, 0x00, 0x00, 0xff, 0xff, 0x80, 0x00]);

assert.equal(Token.INT16_BE.get(buf, 0), 2586);
assert.equal(Token.INT16_BE.get(buf, 2), 0);
Expand All @@ -39,7 +38,7 @@ describe('Parse 16-bit signed integer', () => {

it('should encode', () => {

const buf = Buffer.alloc(2);
const buf = new Uint8Array(2);

Token.INT16_LE.put(buf, 0, 0x00);
util.checkBuffer(buf, '0000');
Expand All @@ -53,7 +52,7 @@ describe('Parse 16-bit signed integer', () => {

it('should decode', () => {

const buf = Buffer.from('\x1a\x0a\x00\x00\xff\xff\x00\x80', 'binary');
const buf = new Uint8Array([0x1a, 0x0a, 0x00, 0x00, 0xff, 0xff, 0x00, 0x80]);

assert.equal(Token.INT16_LE.get(buf, 0), 2586);
assert.equal(Token.INT16_LE.get(buf, 2), 0);
Expand Down
Loading

0 comments on commit 045717d

Please sign in to comment.