Created
April 19, 2024 19:41
-
-
Save kwindla/797b9a66dbde115638c406749c49eced to your computer and use it in GitHub Desktop.
Groq Llama-3 Time To First Byte
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import time | |
import statistics | |
from groq import Groq | |
# Set the Groq API key and the number of inferences to run | |
GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
NUM_INFERENCES = 10 | |
# Create a Groq client instance | |
client = Groq(api_key=GROQ_API_KEY) | |
# Create a list to store the TTFB results | |
ttfb_results = [] | |
for _ in range(NUM_INFERENCES): | |
# Set the input prompt (replace with your own input) | |
input_prompt = "Tell me about Hamlet's state of mind when he said \"to be or not to be?\"" | |
# Measure the time before sending the request | |
start_time = time.time() | |
# Send the request to the Groq model | |
with client.chat.completions.with_streaming_response.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": input_prompt, | |
} | |
], | |
model="llama3-70b-8192", | |
stream=True, | |
) as response: | |
for line in response.iter_lines(): | |
print(line) | |
break | |
# Measure the time after receiving the response | |
end_time = time.time() | |
# Calculate the TTFB and add it to the results list | |
ttfb = end_time - start_time | |
ttfb_results.append(ttfb * 1000) # Convert to milliseconds | |
# Calculate and display the statistical overview | |
mean_ttfb = statistics.mean(ttfb_results) | |
median_ttfb = statistics.median(ttfb_results) | |
stddev_ttfb = statistics.stdev(ttfb_results) | |
min_ttfb = min(ttfb_results) | |
max_ttfb = max(ttfb_results) | |
print(f"Mean TTFB: {mean_ttfb:.2f} ms") | |
print(f"Median TTFB: {median_ttfb:.2f} ms") | |
print(f"Standard Deviation: {stddev_ttfb:.2f} ms") | |
print(f"Min TTFB: {min_ttfb:.2f} ms") | |
print(f"Max TTFB: {max_ttfb:.2f} ms") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment