diff --git a/esbuild.config.js b/esbuild.config.js new file mode 100644 index 0000000..42ba5de --- /dev/null +++ b/esbuild.config.js @@ -0,0 +1,14 @@ +import { build } from 'esbuild' +import fs from 'fs' + +await build({ + entryPoints: ['./src/cli.ts'], + bundle: true, + platform: 'node', + format: 'cjs', + outfile: './out/cli.cjs', +}); + +const wasmFile = fs.readFileSync('./node_modules/@dqbd/tiktoken/lite/tiktoken_bg.wasm') + +fs.writeFileSync('./out/tiktoken_bg.wasm', wasmFile) diff --git a/package.json b/package.json index 0792205..bb34032 100644 --- a/package.json +++ b/package.json @@ -40,7 +40,7 @@ "watch": "npm run -S build -- --sourcemap --watch", "start": "node ./out/cli.cjs", "dev": "ts-node ./src/cli.ts", - "build": "rimraf out && esbuild ./src/cli.ts --bundle --outfile=out/cli.cjs --format=cjs --platform=node", + "build": "rimraf out && node esbuild.config.js", "deploy": "npm run build && npm version patch && npm publish --tag latest", "lint": "eslint src --ext ts && tsc --noEmit", "format": "prettier --write src" @@ -60,6 +60,7 @@ }, "dependencies": { "@clack/prompts": "^0.6.1", + "@dqbd/tiktoken": "^1.0.2", "axios": "^1.3.4", "chalk": "^5.2.0", "cleye": "^1.3.2", diff --git a/src/utils/tokenCount.ts b/src/utils/tokenCount.ts index fafbf8f..45eec64 100644 --- a/src/utils/tokenCount.ts +++ b/src/utils/tokenCount.ts @@ -1,14 +1,15 @@ -// import { Tiktoken } from '@dqbd/tiktoken/lite'; -// import cl100k_base from '@dqbd/tiktoken/encoders/cl100k_base.json' assert { type: 'json' }; +import { Tiktoken } from '@dqbd/tiktoken/lite'; +import cl100k_base from '@dqbd/tiktoken/encoders/cl100k_base.json' assert { type: 'json' }; export function tokenCount(content: string): number { - // const encoding = new Tiktoken( - // cl100k_base.bpe_ranks, - // cl100k_base.special_tokens, - // cl100k_base.pat_str - // ); - // const tokens = encoding.encode(content); - // encoding.free(); + const encoding = new Tiktoken( + cl100k_base.bpe_ranks, + cl100k_base.special_tokens, + cl100k_base.pat_str + ); + const tokens = encoding.encode(content); + encoding.free(); + return tokens.length; - return content.length / 2.7; + //return content.length / 2.7; }