Last active
September 9, 2024 12:20
-
-
Save filiptibell/c5b1a3651234649c13a4b67f18562eff to your computer and use it in GitHub Desktop.
Push safetensors Ollama model with all quantizations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# This script will create and push all the different quantizations of a | |
# Modelfile in the current directory, tagged using the following format: | |
# | |
# - <model_name>:<model_size>-<quantization> | |
# | |
# This is the standard format used by Ollama for pushing models. | |
# Example using the Llama 2 7b model: | |
# | |
# `push-ollama-model.sh username llama2 7b` | |
# | |
# This will create and push tags such as the following: | |
# | |
# - username/llama2:7b | |
# - username/llama2:7b-fp16 | |
# - username/llama2:7b-q8_0 | |
# - username/llama2:7b-q6_K | |
# - username/llama2:7b-q4_K_S | |
# - etc ... | |
# ------------- # | |
# CONFIGURATION # | |
# ------------- # | |
# These are all of the quantizations currently supported by Ollama at | |
# the time of writing, you can remove quants from the list to disable them | |
# https://github.com/ollama/ollama/blob/main/docs/import.md#quantizing-a-model | |
QUANTIZATIONS=( | |
# No quantization | |
"" | |
# Basic quantizations | |
"q8_0" | |
"q5_1" | |
"q5_0" | |
"q4_1" | |
"q4_0" | |
# K-means quantizations | |
"q6_K" | |
"q5_K_M" | |
"q5_K_S" | |
"q4_K_M" | |
"q4_K_S" | |
"q3_K_L" | |
"q3_K_M" | |
"q3_K_S" | |
"q2_K" | |
) | |
# This tag will be pushed as "latest" | |
DEFAULT_QUANTIZATION="q4_0" | |
# ----------- # | |
# MAIN SCRIPT # | |
# ----------- # | |
# STEP 1: Make sure we got all the input arguments | |
USER_NAME="$1" | |
MODEL_NAME="$2" | |
MODEL_SIZE="$3" | |
if [ -z "$USER_NAME" ] || [ -z "$MODEL_NAME" ] || [ -z "$MODEL_SIZE" ]; then | |
echo "Usage: $0 <user_name> <model_name> <model_size>" | |
echo "Example: $0 username llama2 7b" | |
exit 1 | |
fi | |
set -euo pipefail | |
MODEL="$USER_NAME/$MODEL_NAME" | |
printf "\nModel: $MODEL\n" | |
# STEP 2: Upload the desired default tag | |
TAG="$MODEL_SIZE" | |
printf "\nCreating and pushing with initial tag: $TAG\n" | |
ollama create "$MODEL:$TAG" --quantize "$DEFAULT_QUANTIZATION" | |
ollama push "$MODEL:$TAG" | |
# STEP 3: Upload all the different quantizations | |
for QUANT in "${QUANTIZATIONS[@]}"; do | |
if [ -z "$QUANT" ]; then | |
TAG="$MODEL_SIZE-fp16" | |
else | |
TAG="$MODEL_SIZE-$QUANT" | |
fi | |
if [ -z "$QUANT" ]; then | |
printf "\nCreating and pushing unquantized with tag: $TAG\n" | |
ollama create "$MODEL:$TAG" | |
ollama push "$MODEL:$TAG" | |
else | |
printf "\nCreating and pushing quantized with tag: $TAG\n" | |
ollama create "$MODEL:$TAG" --quantize "$QUANT" | |
ollama push "$MODEL:$TAG" | |
fi | |
done | |
# STEP 4: Push the latest tag last, just to make sure | |
TAG="$MODEL:latest" | |
printf "\nCreating and pushing with tag: $TAG\n" | |
ollama create "$MODEL:latest" --quantize "$DEFAULT_QUANTIZATION" | |
ollama push "$MODEL:latest" | |
# STEP 5: Models should now all be available! | |
printf "\n🎉 Done! 🎉\n" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment