Last active
September 19, 2024 21:17
-
-
Save SteelPh0enix/e66808b99f00db22bfec951058a01c2e to your computer and use it in GitHub Desktop.
llama.cpp PowerShell utils
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Collection of variables, aliases and Functions to work w/ llama.cpp | |
# Source to activate. | |
# HARDCODED VALUES - MAKE SURE TO TUNE THEM FOR YOUR SYSTEM! | |
$Env:ROCM_VERSION = "6.1.2" | |
$Env:USE_ROCM = 1 | |
$Env:HIP_PLATFORM = "amd" | |
$Env:GPU_ARCHS = "gfx1100" | |
$Env:HSA_OVERRIDE_GFX_VERSION = "11.0.0" | |
$Env:TF_PYTHON_VERSION = "3.12" | |
# HIP_PATH should be set by ROCm installer | |
$Env:ROCM_PATH = $Env:HIP_PATH | |
$Env:GFX_ARCH = $Env:GPU_ARCHS | |
$Env:AMDGPU_TARGETS = $Env:GPU_ARCHS | |
$Env:PYTORCH_ROCM_ARCH = $Env:GPU_ARCHS | |
$Env:TF_ROCM_AMDGPU_TARGETS = $Env:GPU_ARCHS | |
# llama.cpp-related variables (tweak if necessary) | |
$Env:LLAMA_CPP_PATH = "$Env:USERPROFILE\.llama.cpp" | |
$Env:LLAMA_CPP_PYTHON_VENV_PATH = "$Env:USERPROFILE\.llama.cpp.venv" | |
$Env:LLAMA_CPP_SERVER_HOST = "steelph0enix.pc" | |
$Env:LLAMA_CPP_SERVER_PORT = "51536" | |
$Env:LLAMA_CPP_SERVER_URL = "http://${Env:LLAMA_CPP_SERVER_HOST}:${Env:LLAMA_CPP_SERVER_PORT}/" | |
$Env:LLAMA_CPP_SERVER_CTX_SIZE = "20480" | |
$Env:LLAMA_CPP_SERVER_BATCH_SIZE=4096 | |
$Env:LLAMA_CPP_SERVER_UBATCH_SIZE=1024 | |
$Env:LLAMA_CPP_SERVER_GPU_LAYERS = 999 | |
$Env:LLAMA_CPP_SERVER_PRIORITY=1 | |
$Env:LLAMA_CPP_SERVER_FRONTEND_PATH="D:\LLMs\llama_server_frontend" | |
$Env:OPENAI_API_KEY="no-key-required" | |
$Env:PATH = "${Env:PATH};${Env:ROCM_PATH}bin;${Env:LLAMA_CPP_PATH}\build\bin" | |
$Env:PYTHONPATH = "${Env:LLAMA_CPP_PATH}\gguf-py;${Env:PYTHONPATH}" | |
$Env:CMAKE_MODULE_PATH = "${Env:CMAKE_MODULE_PATH};${Env:ROCM_PATH}cmake" | |
# System-related variables | |
$logical_cores_amount = (Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors | |
# llama.cpp python venv stuff | |
Function llm-llama-venv-activate { | |
. $Env:LLAMA_CPP_PYTHON_VENV_PATH\Scripts\Activate.ps1 | |
Write-Host "llama.cpp python virtual environment activated!" | |
} | |
Function llm-llama-venv-update { | |
Write-Host "Updating llama.cpp python virtualenv..." | |
llm-llama-venv-activate | |
python -m pip install --upgrade pip setuptools wheel | |
python -m pip install --upgrade torch sentencepiece transformers protobuf | |
} | |
Function llm-llama-venv-initialize { | |
Write-Host "Initializing python virtualenv for llama.cpp..." | |
python -m venv $Env:LLAMA_CPP_PYTHON_VENV_PATH | |
llm-llama-venv-update | |
} | |
# generic llm-related functions | |
Function llm-server($model_path) { | |
echo "Running $model_path using llama-server @ $Env:LLAMA_CPP_SERVER_URL w/ $logical_cores_amount CPU cores, $Env:LLAMA_CPP_SERVER_GPU_LAYERS GPU layers, and $Env:LLAMA_CPP_SERVER_CTX_SIZE context size" | |
llama-server ` | |
--threads $logical_cores_amount ` | |
--prio $Env:LLAMA_CPP_SERVER_PRIORITY ` | |
--prio-batch $Env:LLAMA_CPP_SERVER_PRIORITY ` | |
--mlock ` | |
--gpu-layers $Env:LLAMA_CPP_SERVER_GPU_LAYERS ` | |
--ctx-size $Env:LLAMA_CPP_SERVER_CTX_SIZE ` | |
--batch-size $Env:LLAMA_CPP_SERVER_BATCH_SIZE ` | |
--ubatch-size $Env:LLAMA_CPP_SERVER_UBATCH_SIZE ` | |
--host $Env:LLAMA_CPP_SERVER_HOST ` | |
--port $Env:LLAMA_CPP_SERVER_PORT ` | |
--flash-attn ` | |
--mirostat 2 ` | |
--model $model_path ` | |
--path $Env:LLAMA_CPP_SERVER_FRONTEND_PATH | |
} | |
Function llm-quantize-model { | |
[CmdletBinding()] | |
Param( | |
[Parameter(Mandatory=$true)] | |
[string]$model_path, | |
[Parameter(Mandatory=$false)] | |
[string]$quantization = 'auto' | |
) | |
$output_file = Split-Path -Path $model_path -Leaf | |
Write-Host "Converting model from HF format to GGUF..." | |
Invoke-Expression "python $Env:LLAMA_CPP_PATH/convert_hf_to_gguf.py --outtype $quantization --outfile $output_file.$quantization.gguf $model_path" | |
} | |
# llama.cpp management functions | |
Function llm-llama-clone { | |
echo "Pulling llama.cpp repository to $Env:LLAMA_CPP_PATH" | |
git clone git@github.com:ggerganov/llama.cpp.git $Env:LLAMA_CPP_PATH | |
$og_pwd = Get-Location | |
cd $Env:LLAMA_CPP_PATH | |
git submodule update --init --recursive | |
git lfs pull | |
cd $og_pwd | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment