Skip to content

Commit

Permalink
Merge pull request #1 from paradite/feat/token-counter
Browse files Browse the repository at this point in the history
Feat/token counter
  • Loading branch information
paradite authored Jul 21, 2024
2 parents 2120f9a + 59bd0d9 commit 139722d
Show file tree
Hide file tree
Showing 11 changed files with 753 additions and 123 deletions.
56 changes: 27 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ $ yarn add llm-info
## Usage

```ts
// Models
import { AllModels, ModelEnum, NonModelEnum, ModelInfoMap } from 'llm-info';

console.log(AllModels);
Expand All @@ -48,45 +49,42 @@ console.log(AllModelLikes);
]
*/

const model = ModelEnum['gpt-4'];
const modelInfo = ModelInfoMap[model];
// Model Info
const modelInfo = ModelInfoMap['gpt-4o'];
console.log(modelInfo);
/*
{
name: 'GPT-4',
name: 'GPT-4o',
provider: 'openai',
contextWindowTokenLimit: 128000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 30,
pricePerMillionOutputTokens: 60
pricePerMillionInputTokens: 5,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/gpt-4o'
}
*/

console.log(ModelInfoMap[ModelEnum['claude-3-5-sonnet-20240620']]);
/*
{
name: 'Claude 3.5 Sonnet',
provider: 'anthropic',
contextWindowTokenLimit: 200000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 3,
pricePerMillionOutputTokens: 15
// Tokenizer
import { AutoTokenizer } from '@xenova/transformers';
const testSentence =
"Many words map to one token, but some don't: indivisible.";
const results: string[] = [];
for (let i = 0; i < AllModels.length; i++) {
const model = AllModels[i];
if (ModelInfoMap[model].tokenizerId) {
const tokenizer = await AutoTokenizer.from_pretrained(
ModelInfoMap[model].tokenizerId
);
const tokens = tokenizer.encode(testSentence);
results.push(`${model}: ${tokens.length}`);
}
}
*/

const modelLike = NonModelEnum['chatgpt'];
const modelLikeInfo = ModelInfoMap[modelLike];
console.log(modelLikeInfo);
/*
{
name: 'ChatGPT',
provider: 'openai',
contextWindowTokenLimit: 4096,
outputTokenLimit: 4096,
pricePerMillionInputTokens: null,
pricePerMillionOutputTokens: null
}
*/
console.log(`Test sentence: ${testSentence}\n${results.join('\n')}`);
// Test sentence: Many words map to one token, but some don't: indivisible.
// gpt-4: 15
// gpt-4o: 14
// gpt-4o-mini: 14
// claude-3-5-sonnet-20240620: 16
```

## Testing
Expand Down
7 changes: 0 additions & 7 deletions jest.config.js

This file was deleted.

24 changes: 24 additions & 0 deletions jest.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// jest.config.ts
import type { JestConfigWithTsJest } from 'ts-jest';

// https://kulshekhar.github.io/ts-jest/docs/guides/esm-support

const jestConfig: JestConfigWithTsJest = {
// [...]
extensionsToTreatAsEsm: ['.ts'],
moduleNameMapper: {
'^(\\.{1,2}/.*)\\.js$': '$1',
},
transform: {
// '^.+\\.[tj]sx?$' to process ts,js,tsx,jsx with `ts-jest`
// '^.+\\.m?[tj]sx?$' to process ts,js,tsx,jsx,mts,mjs,mtsx,mjsx with `ts-jest`
'^.+\\.tsx?$': [
'ts-jest',
{
useESM: true,
},
],
},
};

export default jestConfig;
7 changes: 5 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
{
"name": "llm-info",
"version": "1.0.3",
"version": "1.0.4",
"description": "Information on LLM models, context window token limit, output token limit, pricing and more",
"main": "dist/index.js",
"type": "module",
"module": "./dist/index.mjs",
"types": "./dist/index.d.ts",
"exports": {
Expand All @@ -15,7 +16,7 @@
"scripts": {
"compile": "tsup src/index.ts --dts --format esm,cjs",
"prepublishOnly": "rm -rf ./dist && npm run compile",
"test": "jest"
"test": "NODE_OPTIONS=\"$NODE_OPTIONS --experimental-vm-modules\" jest"
},
"author": "paradite",
"license": "MIT",
Expand All @@ -31,8 +32,10 @@
],
"devDependencies": {
"@types/jest": "^29.5.12",
"@xenova/transformers": "^2.17.2",
"jest": "^29.7.0",
"ts-jest": "^29.2.3",
"ts-node": "^10.9.2",
"tsup": "^8.2.0",
"typescript": "^5.5.3"
}
Expand Down
78 changes: 3 additions & 75 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,75 +1,3 @@
export enum ModelEnum {
'gpt-4' = 'gpt-4',
'gpt-4o' = 'gpt-4o',
'gpt-4o-mini' = 'gpt-4o-mini',
'claude-3-5-sonnet-20240620' = 'claude-3-5-sonnet-20240620',
}

export enum NonModelEnum {
'chatgpt' = 'chatgpt',
}

export const AllModels = Object.values(ModelEnum);

export const AllModelLikes = [...AllModels, ...Object.values(NonModelEnum)];

export type ModelLike = ModelEnum | NonModelEnum;

export const AI_PROVIDERS = {
OPENAI: 'openai',
ANTHROPIC: 'anthropic',
} as const;

export type AI_PROVIDER_TYPE = (typeof AI_PROVIDERS)[keyof typeof AI_PROVIDERS];

export type ModelInfo = {
name: string;
provider: AI_PROVIDER_TYPE;
contextWindowTokenLimit: number;
outputTokenLimit: number;
pricePerMillionInputTokens: number | null;
pricePerMillionOutputTokens: number | null;
};

export const ModelInfoMap: Record<ModelLike, ModelInfo> = {
[ModelEnum['gpt-4']]: {
name: 'GPT-4',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 30,
pricePerMillionOutputTokens: 60,
},
[ModelEnum['gpt-4o']]: {
name: 'GPT-4o',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 5,
pricePerMillionOutputTokens: 15,
},
[ModelEnum['gpt-4o-mini']]: {
name: 'GPT-4o mini',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 0.15,
pricePerMillionOutputTokens: 0.6,
},
[ModelEnum['claude-3-5-sonnet-20240620']]: {
name: 'Claude 3.5 Sonnet',
provider: AI_PROVIDERS.ANTHROPIC,
contextWindowTokenLimit: 200000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 3,
pricePerMillionOutputTokens: 15,
},
[NonModelEnum['chatgpt']]: {
name: 'ChatGPT',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 4096,
outputTokenLimit: 4096,
pricePerMillionInputTokens: null,
pricePerMillionOutputTokens: null,
},
};
export * from './model';
export * from './provider';
export * from './modelInfo';
16 changes: 16 additions & 0 deletions src/model.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
export enum ModelEnum {
'gpt-4' = 'gpt-4',
'gpt-4o' = 'gpt-4o',
'gpt-4o-mini' = 'gpt-4o-mini',
'claude-3-5-sonnet-20240620' = 'claude-3-5-sonnet-20240620',
}

export enum NonModelEnum {
'chatgpt' = 'chatgpt',
}

export const AllModels = Object.values(ModelEnum);

export const AllModelLikes = [...AllModels, ...Object.values(NonModelEnum)];

export type ModelLike = ModelEnum | NonModelEnum;
60 changes: 60 additions & 0 deletions src/modelInfo.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { ModelEnum, ModelLike, NonModelEnum } from './model';
import { AI_PROVIDER_TYPE, AI_PROVIDERS } from './provider';

export type ModelInfo = {
name: string;
provider: AI_PROVIDER_TYPE;
contextWindowTokenLimit: number;
outputTokenLimit: number;
pricePerMillionInputTokens: number | null;
pricePerMillionOutputTokens: number | null;
tokenizerId: string | null;
};

export const ModelInfoMap: Record<ModelLike, ModelInfo> = {
[ModelEnum['gpt-4']]: {
name: 'GPT-4',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 30,
pricePerMillionOutputTokens: 60,
tokenizerId: 'Xenova/gpt-4',
},
[ModelEnum['gpt-4o']]: {
name: 'GPT-4o',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 5,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/gpt-4o',
},
[ModelEnum['gpt-4o-mini']]: {
name: 'GPT-4o mini',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 0.15,
pricePerMillionOutputTokens: 0.6,
tokenizerId: 'Xenova/gpt-4o',
},
[ModelEnum['claude-3-5-sonnet-20240620']]: {
name: 'Claude 3.5 Sonnet',
provider: AI_PROVIDERS.ANTHROPIC,
contextWindowTokenLimit: 200000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 3,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/claude-tokenizer',
},
[NonModelEnum['chatgpt']]: {
name: 'ChatGPT',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 4096,
outputTokenLimit: 4096,
pricePerMillionInputTokens: null,
pricePerMillionOutputTokens: null,
tokenizerId: null,
},
};
6 changes: 6 additions & 0 deletions src/provider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export const AI_PROVIDERS = {
OPENAI: 'openai',
ANTHROPIC: 'anthropic',
} as const;

export type AI_PROVIDER_TYPE = (typeof AI_PROVIDERS)[keyof typeof AI_PROVIDERS];
5 changes: 2 additions & 3 deletions test/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@ describe('llm', () => {
]);
});
it('info works', () => {
const model = ModelEnum['gpt-4'];
const modelInfo = ModelInfoMap[model];
const modelInfo = ModelInfoMap['gpt-4o'];
console.log(modelInfo);
expect(modelInfo.name).toBe('GPT-4');
expect(modelInfo.name).toBe('GPT-4o');

console.log(ModelInfoMap[ModelEnum['claude-3-5-sonnet-20240620']]);

Expand Down
23 changes: 23 additions & 0 deletions test/tokenizer.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { AllModels, ModelInfoMap } from '../src';
import { AutoTokenizer } from '@xenova/transformers';

describe('llm', () => {
it('tokenizer works', async () => {
const testSentence =
"Many words map to one token, but some don't: indivisible.";
const results: string[] = [];
for (let i = 0; i < AllModels.length; i++) {
const model = AllModels[i];
if (ModelInfoMap[model].tokenizerId) {
const tokenizer = await AutoTokenizer.from_pretrained(
ModelInfoMap[model].tokenizerId
);
const tokens = tokenizer.encode(testSentence);
expect(tokens.length).toBeGreaterThanOrEqual(14);
expect(tokens.length).toBeLessThanOrEqual(17);
results.push(`${model}: ${tokens.length}`);
}
}
console.log(`Test sentence: ${testSentence}\n${results.join('\n')}`);
}, 5000);
});
Loading

0 comments on commit 139722d

Please sign in to comment.