Last active
March 31, 2023 14:19
-
-
Save cyyynthia/43784451936e2a608566c42b0bacceac to your computer and use it in GitHub Desktop.
Quick and dirty script to measure the evolution of performance of LLaMA.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*! | |
* BSD Zero Clause License | |
* Copyright (c) Cynthia Rey | |
* | |
* Permission to use, copy, modify, and/or distribute this software for any | |
* purpose with or without fee is hereby granted. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | |
* REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | |
* AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | |
* INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
* LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
* PERFORMANCE OF THIS SOFTWARE. | |
*/ | |
import { spawn, execSync } from 'child_process' | |
import { open } from 'fs/promises' | |
const LLAMA_PARAMETERS = [ | |
'-t', '4', | |
'-p', 'Here is a long story about how programming came to be:', | |
'-n', '256', | |
'-c', '1024', | |
'--top_k', '40', | |
'--top_p', '0.95', | |
'--repeat_last_n', '64', | |
'--repeat_penalty', '1.1', | |
] | |
const LLAMA_START_AT = '2d64715ad475f192a4004a52d134c67ccb6f44ad' | |
const LLAMA_GGMF_SINCE = '074bea2eb1f1349a0118239c4152914aecaa1be4' | |
const LLAMA_GGJT_SINCE = '78ca9838ee36660a776e97e3391b6fb5dcaacf7f' | |
const IMPORTANT_FILES = [ | |
'ggml.c', 'ggml.h', | |
'llama.cpp', 'llama.h', | |
'examples/common.cpp', 'examples/common.h', | |
'examples/main/main.cpp', | |
] | |
const FORMATS = { | |
unversioned: './models/7B/ggml-model-q4_0.unversioned.bin', | |
ggmf: './models/7B/ggml-model-q4_0.ggmf.bin', | |
ggjt: './models/7B/ggml-model-q4_0.bin', | |
} | |
const csv = await open('./result.csv', 'w') | |
csv.appendFile('commit,format,load time,sample time,prompt eval time,eval time,total time,eval token time,max memory rss (KB)\n') | |
function getMemoryUsageOf (pid) { | |
return Number(execSync(`ps -p ${pid} -o rss`).subarray(6).toString()) | |
} | |
async function runLlama (commit, format) { | |
return new Promise((resolve) => { | |
const llama = spawn('./main', [ ...LLAMA_PARAMETERS, '-m', FORMATS[format] ]) | |
// Keep used memory information | |
// Ugly way of doing it but I cba to make it better :D | |
let usedMemory = 0 | |
const memCheck = setInterval(() => (usedMemory = Math.max(usedMemory, getMemoryUsageOf(llama.pid))), 5e3) | |
let dnf = false | |
const to = setTimeout(() => { | |
dnf = true | |
llama.kill('SIGKILL') | |
console.log('Warn: DNF (timeout)') | |
}, 300e3) | |
const stderrChunks = [] | |
llama.stderr.on('data', (d) => stderrChunks.push(d)) | |
llama.on('exit', () => { | |
clearInterval(memCheck) | |
clearTimeout(to) | |
// DNF; go next | |
if (dnf) return resolve() | |
const stderr = Buffer.concat(stderrChunks).toString() | |
const timings = stderr.matchAll(/(?:llama_print_timings|main):.*?time =\s+([\d\.]+)/g) | |
const tokTime = stderr.match(/eval time.*?\(\s+([\d\.]+) ms per run|\/\s+([\d.]+) ms/) | |
const loadTime = timings.next().value[1] | |
const sampleTime = timings.next().value[1] | |
const promptEvalTime = stderr.includes('prompt eval time') ? timings.next().value[1] : '' | |
const evalTime = timings.next().value[1] | |
const totalTime = timings.next().value[1] | |
const tokenTime = tokTime[1] ?? tokTime[2] | |
csv.appendFile(`${commit},${format},${loadTime},${sampleTime},${promptEvalTime},${evalTime},${totalTime},${tokenTime},${usedMemory}\n`) | |
resolve() | |
}) | |
}) | |
} | |
function checkoutLlama (commit) { | |
execSync('make clean', { stdio: 'ignore' }) | |
execSync(`git checkout ${commit}`, { stdio: 'ignore' }) | |
execSync('make -j main', { stdio: 'ignore' }) | |
} | |
function getCommits () { | |
// This function excludes commits which did not modify llama-related files. | |
// It's not made in the most efficient way, but whatever :D | |
const allCommits = execSync('git log --pretty=format:"%H"').toString().split('\n') | |
const importantCommits = new Set() | |
for (const file of IMPORTANT_FILES) { | |
execSync(`git log --pretty=format:"%H" --follow '${file}'`).toString().split('\n') | |
.forEach((c) => importantCommits.add(c)) | |
} | |
return allCommits.slice(0, allCommits.indexOf(LLAMA_START_AT) + 1).reverse() | |
.filter((c) => importantCommits.has(c)) | |
} | |
const commits = getCommits() | |
const ggmfChangeIdx = commits.indexOf(LLAMA_GGMF_SINCE) | |
const ggjtChangeIdx = commits.indexOf(LLAMA_GGJT_SINCE) | |
for (let i = 0; i < commits.length; i++) { | |
const commit = commits[i] | |
const format = i < ggmfChangeIdx | |
? 'unversioned' | |
: i < ggjtChangeIdx | |
? 'ggmf' | |
: 'ggjt' | |
console.log('Processing commit %s (%d/%d)', commit, i, commits.length) | |
try { | |
checkoutLlama(commit) | |
} catch { | |
console.log('Warn: DNF (compile error)') | |
continue | |
} | |
try { | |
await runLlama(commit, format) | |
} catch { | |
console.log('Warn: DNF (runtime error)') | |
continue | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment