Skip to content

Instantly share code, notes, and snippets.

@hoosierEE
Created August 9, 2024 17:24
Show Gist options
  • Save hoosierEE/7bc30582c9e30199eec6e4e50cbc31e1 to your computer and use it in GitHub Desktop.
Save hoosierEE/7bc30582c9e30199eec6e4e50cbc31e1 to your computer and use it in GitHub Desktop.
local llama in your terminal
#!/usr/local/bin/env python3
import openai
import sys
# inspired by https://two-wrongs.com/q
# first download llamafile (https://github.com/Mozilla-Ocho/llamafile)
# then run it in another terminal:
# sh -c ./llava-v1.5-7b-q4.llamafile
#
# make this script executable (chmod +x local_llama.py) and put it somewhere on your PATH.
#
# now you can ask LLM questions without leaving the command line:
# local_llama.py "write a python program to find the nth fibonacci number"
# local_llama.py "summarize this document for me: $(cat README.md)"
client = openai.OpenAI(
base_url="http://localhost:8080/v1", # "http://<Your api-server IP>:port"
api_key = "sk-no-key-required"
)
def get_prompts():
if args:=sys.argv[1:]:
return [{"role":"user", "content":x} for x in args]
else:
return [{"role":"user", "content":open(0).read()}] # stdin
stream = client.chat.completions.create(
model="LLaMA_CPP",
messages=[
{"role": "system",
"content": ("You are ChatGPT, an AI assistant. Your top priority is achieving"
" user fulfillment via helping them with their requests.")},
*get_prompts()
],
stream=True)
# stream results chunk-by-chunk as they arrive
for chunk in stream:
print(chunk.choices[0].delta.content or "", end="")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment