Integrating Cycles with Next.js
This guide shows how to add budget governance to a Next.js application using API routes, middleware-level budget guards, and client-side error handling.
For streaming patterns with the Vercel AI SDK, see Integrating with Vercel AI SDK. This guide covers the broader Next.js integration: route-level guards, server actions, per-tenant isolation, and shared client setup.
Prerequisites
- A running Cycles stack with a tenant, API key, and budget (Deploy the Full Stack)
- A Next.js 15+ project (App Router)
- Node.js 20+
Installation
npm install runcyclesEnvironment variables
Add to .env.local:
CYCLES_BASE_URL=http://localhost:7878
CYCLES_API_KEY=cyc_live_...
CYCLES_TENANT=acme
OPENAI_API_KEY=sk-...Shared Cycles client
Create a singleton client for use across API routes and server actions:
// lib/cycles.ts
import { CyclesClient, CyclesConfig } from "runcycles";
export const cyclesClient = new CyclesClient(CyclesConfig.fromEnv());Route-level budget guard
Use client.decide() as a preflight check in an API route before doing expensive work:
// app/api/chat/route.ts
import { NextResponse } from "next/server";
import { cyclesClient } from "@/lib/cycles";
import {
withCycles, getCyclesContext, BudgetExceededError,
} from "runcycles";
export const runtime = "nodejs";
const INPUT_PRICE = 250; // GPT-4o: $2.50/1M tokens
const OUTPUT_PRICE = 1_000; // GPT-4o: $10/1M tokens
export async function POST(req: Request) {
const { prompt } = await req.json();
const generate = withCycles(
{
client: cyclesClient,
actionKind: "llm.completion",
actionName: "gpt-4o",
estimate: () => {
const inputTokens = Math.ceil(prompt.length / 4);
return inputTokens * INPUT_PRICE + 1024 * OUTPUT_PRICE;
},
actual: (result: { usage: { prompt_tokens: number; completion_tokens: number } }) =>
result.usage.prompt_tokens * INPUT_PRICE +
result.usage.completion_tokens * OUTPUT_PRICE,
},
async () => {
const ctx = getCyclesContext();
let maxTokens = 1024;
if (ctx?.caps?.maxTokens) {
maxTokens = Math.min(maxTokens, ctx.caps.maxTokens);
}
// Your LLM call here
const response = await callLLM(prompt, maxTokens);
if (ctx) {
ctx.metrics = {
tokensInput: response.usage.prompt_tokens,
tokensOutput: response.usage.completion_tokens,
};
}
return response;
},
);
try {
const result = await generate();
return NextResponse.json({ content: result.content });
} catch (err) {
if (err instanceof BudgetExceededError) {
return NextResponse.json(
{ error: "budget_exceeded", message: "Budget exhausted." },
{ status: 402 },
);
}
throw err;
}
}Budget preflight in API routes
Next.js middleware runs in the Edge Runtime, which does not support Node.js APIs required by the runcycles client. Instead, add a preflight budget check at the start of your API route handler:
// app/api/chat/route.ts
import { NextResponse } from "next/server";
import { cyclesClient } from "@/lib/cycles";
export async function POST(req: Request) {
const tenant = req.headers.get("x-tenant-id") ?? "acme";
// Preflight: check budget before doing expensive work
const preflight = await cyclesClient.decide({
idempotency_key: crypto.randomUUID(),
subject: { tenant, app: "my-nextjs-app" },
action: { kind: "api.request", name: "/api/chat" },
estimate: { unit: "USD_MICROCENTS", amount: 1_000_000 },
});
if (preflight.isSuccess) {
const decision = preflight.getBodyAttribute("decision");
if (decision === "DENY") {
return NextResponse.json(
{ error: "budget_exceeded", message: "Insufficient budget." },
{ status: 402 },
);
}
}
// Budget allows — proceed with the LLM call
const { prompt } = await req.json();
// ... your withCycles-wrapped LLM call here ...
}Server Actions with budget governance
Guard Next.js Server Actions with withCycles:
// app/actions.ts
"use server";
import { cyclesClient } from "@/lib/cycles";
import { withCycles, BudgetExceededError } from "runcycles";
const INPUT_PRICE = 250;
const OUTPUT_PRICE = 1_000;
export async function summarize(text: string) {
const run = withCycles(
{
client: cyclesClient,
actionKind: "llm.completion",
actionName: "gpt-4o",
estimate: () => Math.ceil(text.length / 4) * INPUT_PRICE + 512 * OUTPUT_PRICE,
actual: (r: { usage: { prompt_tokens: number; completion_tokens: number } }) =>
r.usage.prompt_tokens * INPUT_PRICE + r.usage.completion_tokens * OUTPUT_PRICE,
},
async () => {
return callLLM(`Summarize: ${text}`, 512);
},
);
try {
const result = await run();
return { content: result.content };
} catch (err) {
if (err instanceof BudgetExceededError) {
return { error: "Budget exhausted. Try again later." };
}
throw err;
}
}Per-tenant isolation
Extract the tenant from request headers or auth context:
// lib/tenant.ts
import { headers } from "next/headers";
export async function getTenant(): Promise<string> {
const headerList = await headers();
return headerList.get("x-tenant-id") ?? "acme";
}Use it in API routes to scope budget per tenant:
// app/api/chat/route.ts
import { getTenant } from "@/lib/tenant";
export async function POST(req: Request) {
const tenant = await getTenant();
const generate = withCycles(
{
client: cyclesClient,
actionKind: "llm.completion",
actionName: "gpt-4o",
subject: { tenant },
estimate: () => 2_000_000,
actual: (r: any) => r.usage.prompt_tokens * 250 + r.usage.completion_tokens * 1000,
},
async () => { /* LLM call */ },
);
// ...
}Client-side error handling
Handle budget errors in React components:
// components/chat.tsx
"use client";
import { useState } from "react";
export function Chat() {
const [error, setError] = useState<string | null>(null);
async function handleSubmit(prompt: string) {
const res = await fetch("/api/chat", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ prompt }),
});
if (res.status === 402) {
setError("Your budget has been exhausted. Please contact support.");
return;
}
const data = await res.json();
// handle response...
}
if (error) return <div className="error">{error}</div>;
return <form onSubmit={(e) => { /* ... */ }}>{ /* ... */ }</form>;
}Key points
- Singleton client in
lib/cycles.ts. Share oneCyclesClientacross all routes and server actions. withCyclesfor API routes and server actions. Wraps LLM calls with automatic reserve → execute → commit.- Next.js Middleware for preflight. Use
client.decide()to check budget before requests hit your API routes. - Per-tenant with headers. Extract tenant from
x-tenant-idheader for multi-tenant budget isolation. - 402 for budget errors. Return 402 status from API routes, handle it client-side.
- Use
runtime = "nodejs". Required forAsyncLocalStoragesupport used by the Cycles client context.
Next steps
- Integrating with Vercel AI SDK — streaming patterns with Vercel AI SDK
- Handling Streaming Responses — streaming patterns in detail
- Error Handling in TypeScript — handling budget errors
- Cost Estimation Cheat Sheet — pricing reference for estimation
- Production Operations Guide — running Cycles in production