create a working directory. i use ~/Projects/aimodels
than create subdirectory for the model like mkdir ~/Projects/aimodels/huggingface.co/TheBloke
and cd ~/Projects/aimodels/huggingface.co/TheBloke
now download the model wget https://huggingface.co/TheBloke/Guanaco-13B-Uncensored-GGUF/resolve/main/guanaco-13b-uncensored.Q4_0.gguf
create docker-compose.yaml
version: '3.6'
services:
llama-cpp-api:
image: ghcr.io/abetlen/llama-cpp-python:latest
restart: on-failure
volumes:
- '~/Projects/aimodels:/models'
ports:
- 8000:8000
environment:
MODEL: '/models/huggingface.co/TheBloke/guanaco-13b-uncensored.Q5_0.gguf'
N_GQA: 1
USE_MLOCK: 1
cap_add:
- IPC_LOCK
start server via docker-compose up
and than test api via curl:
curl -X 'POST' \
'http://0.0.0.0:8000/v1/chat/completions' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"max_tokens": 512,
"temperature": 0.7,
"messages": [
{
"content": "I want that you to act as my girlfriend. Your name is Jane.",
"role": "system"
},
{
"content": "hi sexy. tell me something naughty",
"role": "user"
}
]
}' | jq