Token Cost Regression Test Setup
This day introduces per-PR token-budget checks to catch cost regressions before they reach production LLM workloads. It extends the existing usage tracking patterns to enforce budgets early, protecting the sustainability of the tribunal evaluation pipeline in the phase-1-eval arc.
Resources
- 20 min
- 15 min
Codebase anchors
The Tribunal code that demonstrates today's concept. Click the line to open in GitHub or VS Code.
this is the existing cost tracking function that the regression test will extend and monitor for per-PR changes
203 const ip = getClientIP(request);204 const rateLimit = await checkRateLimit(endpoint, ip);205 206 if (!rateLimit.allowed) {207 return {208 allowed: false,209 rateLimit,210 response: rateLimitResponse(rateLimit),211 };212 }213 214 return { allowed: true, rateLimit };215}216 217/**218 * Track API usage for a user219 */220export async function trackUsage(221 userId: string,222 endpoint: string,223 cost: number = 1224): Promise<void> {225 try {226 const { getFirestore, COLLECTIONS } = await import('./firestore');227 const db = await getFirestore();228 229 if (!db) return;230 231 const usageRef = db.collection('usage').doc(userId);232 const now = new Date();233 const monthKey = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, '0')}`;234 235 await db.runTransaction(async (transaction: any) => {236 const doc = await transaction.get(usageRef);237 const data = doc.exists ? doc.data() : {};238 239 const monthlyUsage = data[monthKey] || {};240 monthlyUsage[endpoint] = (monthlyUsage[endpoint] || 0) + cost;241 monthlyUsage.total = (monthlyUsage.total || 0) + cost;242 243 transaction.set(usageRef, {this is the closest existing usage aggregation logic the new token-budget check will measure against
220export async function trackUsage(221 userId: string,222 endpoint: string,223 cost: number = 1224): Promise<void> {225 try {226 const { getFirestore, COLLECTIONS } = await import('./firestore');227 const db = await getFirestore();228 229 if (!db) return;230 231 const usageRef = db.collection('usage').doc(userId);232 const now = new Date();233 const monthKey = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, '0')}`;234 235 await db.runTransaction(async (transaction: any) => {236 const doc = await transaction.get(usageRef);237 const data = doc.exists ? doc.data() : {};238 239 const monthlyUsage = data[monthKey] || {};240 monthlyUsage[endpoint] = (monthlyUsage[endpoint] || 0) + cost;241 monthlyUsage.total = (monthlyUsage.total || 0) + cost;242 243 transaction.set(usageRef, {244 ...data,245 [monthKey]: monthlyUsage,246 lastActivity: now,247 }, { merge: true });248 });249 } catch (error) {250 console.error('[Usage] Tracking error:', error);251 }252}253 254/**255 * Get user's usage stats256 */257export async function getUserUsage(userId: string): Promise<{258 monthly: Record<string, number>;259 total: number;260} | null> {Deliverable
Commit adding token-cost regression test to __tests__/lib/syllabus/cost-regression.test.ts with CI enforcement
Quiz · 2 questions
1. What is the primary goal of a token-cost regression test in a PR workflow?
2. How would you adapt the trackUsage function to support a per-PR budget threshold?