build(esbuild.config.js): add esbuild configuration file and move build command to it (#82)

feat(package.json): add @dqbd/tiktoken dependency
fix(tokenCount.ts): uncomment Tiktoken import and encoding, and return token length instead of an approximation
This commit is contained in:
Raymond
2023-04-05 12:42:07 +08:00
committed by GitHub
parent 2b10dc089c
commit 9f65c450e3
3 changed files with 27 additions and 11 deletions
+11 -10
View File
@@ -1,14 +1,15 @@
// import { Tiktoken } from '@dqbd/tiktoken/lite';
// import cl100k_base from '@dqbd/tiktoken/encoders/cl100k_base.json' assert { type: 'json' };
import { Tiktoken } from '@dqbd/tiktoken/lite';
import cl100k_base from '@dqbd/tiktoken/encoders/cl100k_base.json' assert { type: 'json' };
export function tokenCount(content: string): number {
// const encoding = new Tiktoken(
// cl100k_base.bpe_ranks,
// cl100k_base.special_tokens,
// cl100k_base.pat_str
// );
// const tokens = encoding.encode(content);
// encoding.free();
const encoding = new Tiktoken(
cl100k_base.bpe_ranks,
cl100k_base.special_tokens,
cl100k_base.pat_str
);
const tokens = encoding.encode(content);
encoding.free();
return tokens.length;
return content.length / 2.7;
//return content.length / 2.7;
}