jsjolund · June 29, 2023 23:41
diff --git a/README.md b/README.md
diff --git a/Dockerfile b/Dockerfile
 ARG CUDA_VERSION=12.1.1-devel-ubuntu22.04
 FROM nvidia/cuda:${CUDA_VERSION}

 # Set environment variables
 ENV DEBIAN_FRONTEND=noninteractive \
    SHELL=/bin/bash

 # Set locale
 RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen

 # Install apt packages
 RUN apt-get update \
    && apt-get upgrade -y \
    && apt-get install -y --no-install-recommends \
    git cmake build-essential pkg-config libopenblas-dev \
    && apt-get clean all

 RUN mkdir /src

 RUN git clone --recurse-submodules https://github.com/ggerganov/llama.cpp.git /src/llama.cpp
 RUN cd /src/llama.cpp \
    && mkdir build \
    && cd build \
    && cmake .. -DLLAMA_CUBLAS=ON \
    && cmake --build . --config Release

 # Entrypoint
 ENTRYPOINT ["/src/llama.cpp/build/bin/main"]
diff --git a/prompt.sh b/prompt.sh
 #!/bin/bash

 # Set the model path
 MODEL="/models/Wizard-Vicuna-13B-Uncensored.ggmlv3.q4_K_S.bin"
 # Size of the prompt context (default: 512)
 PROMPT_CTX_SIZE=2048
 # Number of threads to use during computation (default: 12)
 CPU_THREADS=24
 # Number of layers to store in VRAM
 GPU_LAYERS=42

 display_help() {
    cat <<EOL
 Help for llama.cpp:

 $(docker run --rm -it llama-cpp-cuda:0.0.1 --help)

 Help for prompt.sh:

 Usage: prompt.sh [type] [prompt]

  type    Type of prompt. Options are: 'story', 'instruct', 'file'.
  prompt  The prompt you want to process.

 Examples:
  $ prompt.sh story "a mysterious forest"
  $ prompt.sh instruct "build a bicycle"
  $ prompt.sh file "file.txt"
 EOL
 }

 form_prompt() {
    local type=$1
    local user_prompt=$2

    case $type in
        "instruct")
            printf '\b### Instruction:\nHow do I %s?\n### Response:\n' "$user_prompt"
            ;;
        "story")
            printf '\b### Instruction:\nTell me a compelling and imaginative story about %s. Include vivid descriptions and engaging dialogue.\n### Response:\n' "$user_prompt"
            ;;
        "file")
            file_content=$(cat "$user_prompt")
            printf '\b### Instruction:\nI have file on the local hard drive called %s. 1. Infer what file format and syntax the file has and output it. 2. Infer from the file name the purpose of the file and output it. 3. Give a two sentence long description of its contents, which follows:\n\n%s.\n### Response:\n' "$user_prompt" "$file_content"
            ;;
        *)
            printf "Invalid type."
            exit 1
    esac
 }

 [[ $# -eq 0 || $1 == "-h" || $1 == "--help" ]] && { display_help; exit 0; }
 [[ $# -ne 2 ]] && { echo "Error: Invalid number of arguments."; display_help; exit 1; }

 INSTRUCT_PROMPT=$(form_prompt "$1" "$2")

 set -x

 docker run --rm -it \
    --gpus all \
    -v ~/models:/models \
    --name llama \
    llama-cpp-cuda:0.0.1 \
    -m $MODEL \
    -t $CPU_THREADS \
    -ngl $GPU_LAYERS \
    -c $PROMPT_CTX_SIZE \
    -p "${INSTRUCT_PROMPT}" \
    --color --temp 0.7 --repeat_penalty 1.1 -n -1
	ARG CUDA_VERSION=12.1.1-devel-ubuntu22.04
	FROM nvidia/cuda:${CUDA_VERSION}

	# Set environment variables
	ENV DEBIAN_FRONTEND=noninteractive \
	SHELL=/bin/bash

	# Set locale
	RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen

	# Install apt packages
	RUN apt-get update \
	&& apt-get upgrade -y \
	&& apt-get install -y --no-install-recommends \
	git cmake build-essential pkg-config libopenblas-dev \
	&& apt-get clean all

	RUN mkdir /src

	RUN git clone --recurse-submodules https://github.com/ggerganov/llama.cpp.git /src/llama.cpp
	RUN cd /src/llama.cpp \
	&& mkdir build \
	&& cd build \
	&& cmake .. -DLLAMA_CUBLAS=ON \
	&& cmake --build . --config Release

	# Entrypoint
	ENTRYPOINT ["/src/llama.cpp/build/bin/main"]
	#!/bin/bash

	# Set the model path
	MODEL="/models/Wizard-Vicuna-13B-Uncensored.ggmlv3.q4_K_S.bin"
	# Size of the prompt context (default: 512)
	PROMPT_CTX_SIZE=2048
	# Number of threads to use during computation (default: 12)
	CPU_THREADS=24
	# Number of layers to store in VRAM
	GPU_LAYERS=42

	display_help() {
	cat <<EOL
	Help for llama.cpp:

	$(docker run --rm -it llama-cpp-cuda:0.0.1 --help)

	Help for prompt.sh:

	Usage: prompt.sh [type] [prompt]

	type Type of prompt. Options are: 'story', 'instruct', 'file'.
	prompt The prompt you want to process.

	Examples:
	$ prompt.sh story "a mysterious forest"
	$ prompt.sh instruct "build a bicycle"
	$ prompt.sh file "file.txt"
	EOL
	}

	form_prompt() {
	local type=$1
	local user_prompt=$2

	case $type in
	"instruct")
	printf '\b### Instruction:\nHow do I %s?\n### Response:\n' "$user_prompt"
	;;
	"story")
	printf '\b### Instruction:\nTell me a compelling and imaginative story about %s. Include vivid descriptions and engaging dialogue.\n### Response:\n' "$user_prompt"
	;;
	"file")
	file_content=$(cat "$user_prompt")
	printf '\b### Instruction:\nI have file on the local hard drive called %s. 1. Infer what file format and syntax the file has and output it. 2. Infer from the file name the purpose of the file and output it. 3. Give a two sentence long description of its contents, which follows:\n\n%s.\n### Response:\n' "$user_prompt" "$file_content"
	;;
	*)
	printf "Invalid type."
	exit 1
	esac
	}

	[[ $# -eq 0 \|\| $1 == "-h" \|\| $1 == "--help" ]] && { display_help; exit 0; }
	[[ $# -ne 2 ]] && { echo "Error: Invalid number of arguments."; display_help; exit 1; }

	INSTRUCT_PROMPT=$(form_prompt "$1" "$2")

	set -x

	docker run --rm -it \
	--gpus all \
	-v ~/models:/models \
	--name llama \
	llama-cpp-cuda:0.0.1 \
	-m $MODEL \
	-t $CPU_THREADS \
	-ngl $GPU_LAYERS \
	-c $PROMPT_CTX_SIZE \
	-p "${INSTRUCT_PROMPT}" \
	--color --temp 0.7 --repeat_penalty 1.1 -n -1