Skip to content

Instantly share code, notes, and snippets.

@SteelPh0enix
Last active September 19, 2024 21:17
Show Gist options
  • Save SteelPh0enix/e66808b99f00db22bfec951058a01c2e to your computer and use it in GitHub Desktop.
Save SteelPh0enix/e66808b99f00db22bfec951058a01c2e to your computer and use it in GitHub Desktop.
llama.cpp PowerShell utils
# Collection of variables, aliases and Functions to work w/ llama.cpp
# Source to activate.
# HARDCODED VALUES - MAKE SURE TO TUNE THEM FOR YOUR SYSTEM!
$Env:ROCM_VERSION = "6.1.2"
$Env:USE_ROCM = 1
$Env:HIP_PLATFORM = "amd"
$Env:GPU_ARCHS = "gfx1100"
$Env:HSA_OVERRIDE_GFX_VERSION = "11.0.0"
$Env:TF_PYTHON_VERSION = "3.12"
# HIP_PATH should be set by ROCm installer
$Env:ROCM_PATH = $Env:HIP_PATH
$Env:GFX_ARCH = $Env:GPU_ARCHS
$Env:AMDGPU_TARGETS = $Env:GPU_ARCHS
$Env:PYTORCH_ROCM_ARCH = $Env:GPU_ARCHS
$Env:TF_ROCM_AMDGPU_TARGETS = $Env:GPU_ARCHS
# llama.cpp-related variables (tweak if necessary)
$Env:LLAMA_CPP_PATH = "$Env:USERPROFILE\.llama.cpp"
$Env:LLAMA_CPP_PYTHON_VENV_PATH = "$Env:USERPROFILE\.llama.cpp.venv"
$Env:LLAMA_CPP_SERVER_HOST = "steelph0enix.pc"
$Env:LLAMA_CPP_SERVER_PORT = "51536"
$Env:LLAMA_CPP_SERVER_URL = "http://${Env:LLAMA_CPP_SERVER_HOST}:${Env:LLAMA_CPP_SERVER_PORT}/"
$Env:LLAMA_CPP_SERVER_CTX_SIZE = "20480"
$Env:LLAMA_CPP_SERVER_BATCH_SIZE=4096
$Env:LLAMA_CPP_SERVER_UBATCH_SIZE=1024
$Env:LLAMA_CPP_SERVER_GPU_LAYERS = 999
$Env:LLAMA_CPP_SERVER_PRIORITY=1
$Env:LLAMA_CPP_SERVER_FRONTEND_PATH="D:\LLMs\llama_server_frontend"
$Env:OPENAI_API_KEY="no-key-required"
$Env:PATH = "${Env:PATH};${Env:ROCM_PATH}bin;${Env:LLAMA_CPP_PATH}\build\bin"
$Env:PYTHONPATH = "${Env:LLAMA_CPP_PATH}\gguf-py;${Env:PYTHONPATH}"
$Env:CMAKE_MODULE_PATH = "${Env:CMAKE_MODULE_PATH};${Env:ROCM_PATH}cmake"
# System-related variables
$logical_cores_amount = (Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors
# llama.cpp python venv stuff
Function llm-llama-venv-activate {
. $Env:LLAMA_CPP_PYTHON_VENV_PATH\Scripts\Activate.ps1
Write-Host "llama.cpp python virtual environment activated!"
}
Function llm-llama-venv-update {
Write-Host "Updating llama.cpp python virtualenv..."
llm-llama-venv-activate
python -m pip install --upgrade pip setuptools wheel
python -m pip install --upgrade torch sentencepiece transformers protobuf
}
Function llm-llama-venv-initialize {
Write-Host "Initializing python virtualenv for llama.cpp..."
python -m venv $Env:LLAMA_CPP_PYTHON_VENV_PATH
llm-llama-venv-update
}
# generic llm-related functions
Function llm-server($model_path) {
echo "Running $model_path using llama-server @ $Env:LLAMA_CPP_SERVER_URL w/ $logical_cores_amount CPU cores, $Env:LLAMA_CPP_SERVER_GPU_LAYERS GPU layers, and $Env:LLAMA_CPP_SERVER_CTX_SIZE context size"
llama-server `
--threads $logical_cores_amount `
--prio $Env:LLAMA_CPP_SERVER_PRIORITY `
--prio-batch $Env:LLAMA_CPP_SERVER_PRIORITY `
--mlock `
--gpu-layers $Env:LLAMA_CPP_SERVER_GPU_LAYERS `
--ctx-size $Env:LLAMA_CPP_SERVER_CTX_SIZE `
--batch-size $Env:LLAMA_CPP_SERVER_BATCH_SIZE `
--ubatch-size $Env:LLAMA_CPP_SERVER_UBATCH_SIZE `
--host $Env:LLAMA_CPP_SERVER_HOST `
--port $Env:LLAMA_CPP_SERVER_PORT `
--flash-attn `
--mirostat 2 `
--model $model_path `
--path $Env:LLAMA_CPP_SERVER_FRONTEND_PATH
}
Function llm-quantize-model {
[CmdletBinding()]
Param(
[Parameter(Mandatory=$true)]
[string]$model_path,
[Parameter(Mandatory=$false)]
[string]$quantization = 'auto'
)
$output_file = Split-Path -Path $model_path -Leaf
Write-Host "Converting model from HF format to GGUF..."
Invoke-Expression "python $Env:LLAMA_CPP_PATH/convert_hf_to_gguf.py --outtype $quantization --outfile $output_file.$quantization.gguf $model_path"
}
# llama.cpp management functions
Function llm-llama-clone {
echo "Pulling llama.cpp repository to $Env:LLAMA_CPP_PATH"
git clone git@github.com:ggerganov/llama.cpp.git $Env:LLAMA_CPP_PATH
$og_pwd = Get-Location
cd $Env:LLAMA_CPP_PATH
git submodule update --init --recursive
git lfs pull
cd $og_pwd
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment