Skip to content

Instantly share code, notes, and snippets.

@Mearman
Created November 27, 2023 13:46
Show Gist options
  • Save Mearman/85080f34fe75194c664b3d185f462f0e to your computer and use it in GitHub Desktop.
Save Mearman/85080f34fe75194c664b3d185f462f0e to your computer and use it in GitHub Desktop.
import claude from "@anthropic-ai/tokenizer/claude.json";
import { Tiktoken, TiktokenBPE } from "js-tiktoken";
export function countTokens(text: string): number {
const tokenizer = getTokenizer();
const encoded = tokenizer.encode(text.normalize("NFKC"));
return encoded.length;
}
export function getTokenizer(): Tiktoken {
const tiktokenBPE: TiktokenBPE = {
pat_str: claude.pat_str,
special_tokens: claude.special_tokens,
bpe_ranks: claude.bpe_ranks,
};
return new Tiktoken(tiktokenBPE);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment