Created
December 14, 2023 21:19
-
-
Save ianschmitz/6c45e0081330bbd412074210213019e5 to your computer and use it in GitHub Desktop.
Cloudflare Workers AI proxy example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is an example endpoint built for use on Cloudflare Workers using the Hono app. | |
// This example uses Cloudflare AI to run their hosted llama model, | |
// but could easily be tweaked to suit consuming the OpenAI SDK directly. | |
// You could also just use the `fetch` API directly and skip using the SDK as well, | |
// you'd just have to set the appropriate HTTP headers and query params (API key, etc) | |
// See second example for a simpler case. | |
// References: | |
// - https://developers.cloudflare.com/workers/runtime-apis/streams/ | |
// - https://developers.cloudflare.com/workers/platform/pricing/ | |
// - For Cloudflare AI usage within worker: https://developers.cloudflare.com/workers-ai/platform/bindings/ | |
app.get("/stream", async (c) => { | |
const ai = new Ai(c.env.AI); | |
const question = c.req.query("query"); | |
if (!question) { | |
return new Response(null, { | |
status: 400, | |
statusText: "Must provide question param", | |
}); | |
} | |
const systemPrompt = `You are a helpful assistant.`; | |
const stream = await ai.run("@cf/meta/llama-2-7b-chat-int8", { | |
messages: [ | |
{ role: "system", content: systemPrompt }, | |
{ role: "user", content: question }, | |
], | |
stream: true, | |
}); | |
// Note we aren't processing anything from the response stream directly, | |
// we instead pipe it into a new `Response` that is returned to the consumer. | |
// The time spent by the worker piping the response is not billable | |
// under the "standard" (modern) billing model :) | |
// A request like this will easily be under 10ms CPU time billable (usually around 2-3ms in my experience) | |
// and thus can fit in the free tier of Cloudflare Workers as long as you have < 100,000 requests per day | |
return new Response(stream, { | |
headers: { | |
"content-type": "text/event-stream", | |
}, | |
}); | |
}); | |
app.get("/stream2", () => { | |
// This returns the response as-is, all the response headers, body, etc. from the origin | |
return fetch("https://someapi.com/foo", { | |
headers: { | |
Authentication: `Bearer ${token}`, | |
}, | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment