Skip to content

Instantly share code, notes, and snippets.

@Attunewise
Last active August 5, 2024 03:32
Show Gist options
  • Save Attunewise/f961bb233e09e636016fde225c32f122 to your computer and use it in GitHub Desktop.
Save Attunewise/f961bb233e09e636016fde225c32f122 to your computer and use it in GitHub Desktop.
Counts ChatGPT tokens
const { GPT4Tokenizer } = require('gpt4-tokenizer')
const json = require('./conversations.json')
const tokenizer = new GPT4Tokenizer({type:'gpt-3'})
const nodes = []
for (const row of json) {
let id = row.current_node
const { mapping } = row
while (id) {
const node = mapping[id]
if (node.message) {
nodes.push(node)
}
id = node.parent
}
}
function formatEpochToDate(epoch) {
const date = new Date(epoch * 1000); // Convert seconds to milliseconds
const year = date.getUTCFullYear();
const month = String(date.getUTCMonth() + 1).padStart(2, '0'); // Months are zero-indexed
const day = String(date.getUTCDate()).padStart(2, '0');
return `${year}-${month}-${day}`;
}
// Function to group items by date
function groupItemsByDate(items) {
const groups = {};
items.forEach((item) => {
if (item.message) {
const date = formatEpochToDate(item.message.create_time);
if (!groups[date]) {
groups[date] = [];
}
groups[date].push(item);
} else {
//console.log(item)
}
});
return Object.values(groups);
}
nodes.sort((x, y) => {
return x.message.create_time - y.message.create_time
})
// Group the items by date
const groupedItems = groupItemsByDate(nodes);
let input = 0
let output = 0
let inputChars = 0
let outputChars = 0
let maxInput = 0
let maxOutput = 0
for (const day of groupedItems) {
for (const node of day) {
//console.log(node)
const content = node.message.content
//console.log(content)
let text
switch (content.content_type) {
case 'text':
text = content.parts.join('')
break
}
//console.log({text})
if (text) {
const tokens = tokenizer.estimateTokenCount(text)
//console.log(text, tokens)
if (node.message.author.role === 'assistant') {
outputChars += text.length
output += tokens
maxOutput = Math.max(maxOutput, tokens)
} else {
inputChars += text.length
input += tokens
maxInput = Math.max(maxInput, tokens)
}
}
}
}
console.log({days: groupedItems.length, input, output, inputChars, outputChars, maxInput, maxOutput})
console.log("daily tokens", Math.round(input/groupedItems.length), Math.round(output/groupedItems.length))
console.log("daily chars", Math.round(inputChars/groupedItems.length), Math.round(outputChars/groupedItems.length))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment