filiptibell · September 9, 2024 12:20
diff --git a/push-ollama-model.sh b/push-ollama-model.sh
 #!/usr/bin/env bash

 # This script will create and push all the different quantizations of a
 # Modelfile in the current directory, tagged using the following format:
 #
 # - <model_name>:<model_size>-<quantization>
 #
 # This is the standard format used by Ollama for pushing models.
 # Example using the Llama 2 7b model:
 #
 # `push-ollama-model.sh username llama2 7b`
 #
 # This will create and push tags such as the following:
 #
 # - username/llama2:7b
 # - username/llama2:7b-fp16
 # - username/llama2:7b-q8_0
 # - username/llama2:7b-q6_K
 # - username/llama2:7b-q4_K_S
 # - etc ...

 # ------------- #
 # CONFIGURATION #
 # ------------- #

 # These are all of the quantizations currently supported by Ollama at
 # the time of writing, you can remove quants from the list to disable them
 # https://github.com/ollama/ollama/blob/main/docs/import.md#quantizing-a-model

 QUANTIZATIONS=(
    # No quantization
    ""
    # Basic quantizations
    "q8_0"
    "q5_1"
    "q5_0"
    "q4_1"
    "q4_0"
    # K-means quantizations
    "q6_K"
    "q5_K_M"
    "q5_K_S"
    "q4_K_M"
    "q4_K_S"
    "q3_K_L"
    "q3_K_M"
    "q3_K_S"
    "q2_K"
 )

 # This tag will be pushed as "latest"

 DEFAULT_QUANTIZATION="q4_0"

 # ----------- #
 # MAIN SCRIPT #
 # ----------- #

 # STEP 1: Make sure we got all the input arguments

 USER_NAME="$1"
 MODEL_NAME="$2"
 MODEL_SIZE="$3"
 if [ -z "$USER_NAME" ] || [ -z "$MODEL_NAME" ] || [ -z "$MODEL_SIZE" ]; then
    echo "Usage: $0 <user_name> <model_name> <model_size>"
    echo "Example: $0 username llama2 7b"
    exit 1
 fi

 set -euo pipefail

 MODEL="$USER_NAME/$MODEL_NAME"
 printf "\nModel: $MODEL\n"

 # STEP 2: Upload the desired default tag

 TAG="$MODEL_SIZE"
 printf "\nCreating and pushing with initial tag: $TAG\n"
 ollama create "$MODEL:$TAG" --quantize "$DEFAULT_QUANTIZATION"
 ollama push "$MODEL:$TAG"

 # STEP 3: Upload all the different quantizations

 for QUANT in "${QUANTIZATIONS[@]}"; do
    if [ -z "$QUANT" ]; then
        TAG="$MODEL_SIZE-fp16"
    else
        TAG="$MODEL_SIZE-$QUANT"
    fi
    if [ -z "$QUANT" ]; then
        printf "\nCreating and pushing unquantized with tag: $TAG\n"
        ollama create "$MODEL:$TAG"
        ollama push "$MODEL:$TAG"
    else
        printf "\nCreating and pushing quantized with tag: $TAG\n"
        ollama create "$MODEL:$TAG" --quantize "$QUANT"
        ollama push "$MODEL:$TAG"
    fi
 done

 # STEP 4: Push the latest tag last, just to make sure

 TAG="$MODEL:latest"
 printf "\nCreating and pushing with tag: $TAG\n"
 ollama create "$MODEL:latest" --quantize "$DEFAULT_QUANTIZATION"
 ollama push "$MODEL:latest"

 # STEP 5: Models should now all be available!

 printf "\n🎉 Done! 🎉\n"
	#!/usr/bin/env bash

	# This script will create and push all the different quantizations of a
	# Modelfile in the current directory, tagged using the following format:
	#
	# - <model_name>:<model_size>-<quantization>
	#
	# This is the standard format used by Ollama for pushing models.
	# Example using the Llama 2 7b model:
	#
	# `push-ollama-model.sh username llama2 7b`
	#
	# This will create and push tags such as the following:
	#
	# - username/llama2:7b
	# - username/llama2:7b-fp16
	# - username/llama2:7b-q8_0
	# - username/llama2:7b-q6_K
	# - username/llama2:7b-q4_K_S
	# - etc ...

	# ------------- #
	# CONFIGURATION #
	# ------------- #

	# These are all of the quantizations currently supported by Ollama at
	# the time of writing, you can remove quants from the list to disable them
	# https://github.com/ollama/ollama/blob/main/docs/import.md#quantizing-a-model

	QUANTIZATIONS=(
	# No quantization
	""
	# Basic quantizations
	"q8_0"
	"q5_1"
	"q5_0"
	"q4_1"
	"q4_0"
	# K-means quantizations
	"q6_K"
	"q5_K_M"
	"q5_K_S"
	"q4_K_M"
	"q4_K_S"
	"q3_K_L"
	"q3_K_M"
	"q3_K_S"
	"q2_K"
	)

	# This tag will be pushed as "latest"

	DEFAULT_QUANTIZATION="q4_0"

	# ----------- #
	# MAIN SCRIPT #
	# ----------- #

	# STEP 1: Make sure we got all the input arguments

	USER_NAME="$1"
	MODEL_NAME="$2"
	MODEL_SIZE="$3"
	if [ -z "$USER_NAME" ] \|\| [ -z "$MODEL_NAME" ] \|\| [ -z "$MODEL_SIZE" ]; then
	echo "Usage: $0 <user_name> <model_name> <model_size>"
	echo "Example: $0 username llama2 7b"
	exit 1
	fi

	set -euo pipefail

	MODEL="$USER_NAME/$MODEL_NAME"
	printf "\nModel: $MODEL\n"

	# STEP 2: Upload the desired default tag

	TAG="$MODEL_SIZE"
	printf "\nCreating and pushing with initial tag: $TAG\n"
	ollama create "$MODEL:$TAG" --quantize "$DEFAULT_QUANTIZATION"
	ollama push "$MODEL:$TAG"

	# STEP 3: Upload all the different quantizations

	for QUANT in "${QUANTIZATIONS[@]}"; do
	if [ -z "$QUANT" ]; then
	TAG="$MODEL_SIZE-fp16"
	else
	TAG="$MODEL_SIZE-$QUANT"
	fi
	if [ -z "$QUANT" ]; then
	printf "\nCreating and pushing unquantized with tag: $TAG\n"
	ollama create "$MODEL:$TAG"
	ollama push "$MODEL:$TAG"
	else
	printf "\nCreating and pushing quantized with tag: $TAG\n"
	ollama create "$MODEL:$TAG" --quantize "$QUANT"
	ollama push "$MODEL:$TAG"
	fi
	done

	# STEP 4: Push the latest tag last, just to make sure

	TAG="$MODEL:latest"
	printf "\nCreating and pushing with tag: $TAG\n"
	ollama create "$MODEL:latest" --quantize "$DEFAULT_QUANTIZATION"
	ollama push "$MODEL:latest"

	# STEP 5: Models should now all be available!

	printf "\n🎉 Done! 🎉\n"