diff --git a/app/admin/page.tsx b/app/admin/page.tsx index 6407460..a0bed26 100644 --- a/app/admin/page.tsx +++ b/app/admin/page.tsx @@ -1,6 +1,7 @@ import Link from 'next/link'; import { computeMetrics } from '@/lib/admin/metrics'; import { getSupabaseAdmin } from '@/lib/supabase/admin'; +import { withAdminRetry } from '@/lib/admin/retry'; import { formatBytes, formatDate } from '@/lib/format'; export const dynamic = 'force-dynamic'; @@ -18,10 +19,12 @@ export default async function AdminOverviewPage() { const admin = getSupabaseAdmin(); // Recent signups (latest 5 users). - const { data: recentUsersData } = await admin.auth.admin.listUsers({ - page: 1, - perPage: 5, - }); + const { data: recentUsersData } = await withAdminRetry(() => + admin.auth.admin.listUsers({ + page: 1, + perPage: 5, + }), + ); const recentUsers = recentUsersData?.users ?? []; // Over-quota tunnels (compute in memory). diff --git a/app/admin/tunnels/tunnels-table.tsx b/app/admin/tunnels/tunnels-table.tsx index e86bc77..c5c22d6 100644 --- a/app/admin/tunnels/tunnels-table.tsx +++ b/app/admin/tunnels/tunnels-table.tsx @@ -237,6 +237,11 @@ export function TunnelsTable({ } catch { result.fail++; } + // Tiny inter-op pacing: strictly sequential (concurrency 1) with a small + // gap so we never burst the admin API; single refresh after all settle. + if (i < targets.length - 1) { + await new Promise((r) => setTimeout(r, 75)); + } } setBulkProgress(null); setBulkBusy(false); diff --git a/app/admin/users/users-table.tsx b/app/admin/users/users-table.tsx index c898162..4e524e6 100644 --- a/app/admin/users/users-table.tsx +++ b/app/admin/users/users-table.tsx @@ -136,6 +136,12 @@ export function UsersTable({ } catch { result.fail++; } + // Tiny inter-op pacing: keep the per-row mutations strictly sequential + // (concurrency 1) with a small gap so we never burst the GoTrue admin + // API, then refresh ONCE after every op has settled (below). + if (i < targets.length - 1) { + await new Promise((r) => setTimeout(r, 75)); + } } setBulkProgress(null); setBulkBusy(false); diff --git a/app/api/admin/users/[id]/ban/route.ts b/app/api/admin/users/[id]/ban/route.ts index 1a943f5..8660b67 100644 --- a/app/api/admin/users/[id]/ban/route.ts +++ b/app/api/admin/users/[id]/ban/route.ts @@ -1,6 +1,7 @@ import { type NextRequest } from 'next/server'; import { requireAdminApi } from '@/lib/auth/admin-guard'; import { getSupabaseAdmin } from '@/lib/supabase/admin'; +import { withAdminRetry } from '@/lib/admin/retry'; import { logAdminAction } from '@/lib/auth/audit'; import { isUuid, parseBoolean } from '@/lib/admin/validators'; import { jsonNoStore } from '@/lib/admin/response'; @@ -44,9 +45,14 @@ export async function POST( } const admin = getSupabaseAdmin(); - const { error } = await admin.auth.admin.updateUserById(id, { - ban_duration: banned ? BAN_DURATION : 'none', - } as { ban_duration: string }); + // Retry transient empty-body / poisoned-keep-alive failures so a burst-load + // ban/unban doesn't 500. `updateUserById` is idempotent for our use + // (ban_duration set to the same value), so a retry is safe. + const { error } = await withAdminRetry(() => + admin.auth.admin.updateUserById(id, { + ban_duration: banned ? BAN_DURATION : 'none', + } as { ban_duration: string }), + ); if (error) { console.error('admin user.ban failed', error); return jsonNoStore({ error: 'internal error' }, { status: 500 }); diff --git a/app/api/admin/users/[id]/role/route.ts b/app/api/admin/users/[id]/role/route.ts index e1b478f..55931c5 100644 --- a/app/api/admin/users/[id]/role/route.ts +++ b/app/api/admin/users/[id]/role/route.ts @@ -1,6 +1,7 @@ import { type NextRequest } from 'next/server'; import { requireAdminApi } from '@/lib/auth/admin-guard'; import { getSupabaseAdmin } from '@/lib/supabase/admin'; +import { withAdminRetry } from '@/lib/admin/retry'; import { logAdminAction } from '@/lib/auth/audit'; import { isUuid } from '@/lib/admin/validators'; import { jsonNoStore } from '@/lib/admin/response'; @@ -42,17 +43,24 @@ export async function POST( const admin = getSupabaseAdmin(); - // Merge with existing app_metadata so we don't clobber other keys. - const { data: existing, error: getErr } = - await admin.auth.admin.getUserById(id); + // Merge with existing app_metadata so we don't clobber other keys. Retry the + // read on transient empty-body responses so a burst-load flake isn't + // misreported as a 404; a genuine not-found still falls through below. + const { data: existing, error: getErr } = await withAdminRetry(() => + admin.auth.admin.getUserById(id), + ); if (getErr || !existing.user) { return jsonNoStore({ error: 'user not found' }, { status: 404 }); } const merged = { ...(existing.user.app_metadata ?? {}), role }; - const { error } = await admin.auth.admin.updateUserById(id, { - app_metadata: merged, - }); + // `updateUserById` is idempotent here (same app_metadata), so retrying a + // transient empty-body write is safe. + const { error } = await withAdminRetry(() => + admin.auth.admin.updateUserById(id, { + app_metadata: merged, + }), + ); if (error) { console.error('admin user.role failed', error); return jsonNoStore({ error: 'internal error' }, { status: 500 }); diff --git a/app/api/admin/users/[id]/route.ts b/app/api/admin/users/[id]/route.ts index 5a2ea58..c90d361 100644 --- a/app/api/admin/users/[id]/route.ts +++ b/app/api/admin/users/[id]/route.ts @@ -119,11 +119,24 @@ export async function DELETE( // Delete the AUTH USER first. Only if that succeeds do we remove the tunnel // row, so a mid-failure never leaves an orphaned auth user with a dangling - // tunnel (or half-deletes the tunnel of an already-gone user). - const { error: delErr } = await admin.auth.admin.deleteUser(id); + // tunnel (or half-deletes the tunnel of an already-gone user). Retry the + // delete on transient empty-body / poisoned-keep-alive responses. + const { error: delErr } = await withAdminRetry(() => + admin.auth.admin.deleteUser(id), + ); if (delErr) { - console.error('admin user.delete: deleteUser failed', delErr); - return jsonNoStore({ error: 'internal error' }, { status: 500 }); + // The delete may have actually succeeded with a truncated/empty response + // body (the transient failure mode). Re-check existence before reporting a + // 500: if the user is now gone, the deletion is effectively complete. + const { data: recheck, error: recheckErr } = await withAdminRetry(() => + admin.auth.admin.getUserById(id), + ); + if (recheckErr || !recheck?.user) { + // User is gone -> delete succeeded despite the truncated body. + } else { + console.error('admin user.delete: deleteUser failed', delErr); + return jsonNoStore({ error: 'internal error' }, { status: 500 }); + } } // The auth user is gone, so no orphaned-user state is possible. Removing the diff --git a/lib/admin/csv.ts b/lib/admin/csv.ts index 3415bb6..8ef1c9f 100644 --- a/lib/admin/csv.ts +++ b/lib/admin/csv.ts @@ -11,6 +11,13 @@ export function csvField(v: unknown): string { else if (typeof v === 'string') s = v; else if (typeof v === 'object') s = JSON.stringify(v); else s = String(v); + // Spreadsheet formula-injection guard: a field whose first character is one + // of = + - @ (or a leading tab/CR) is interpreted as a formula by Excel / + // Sheets / LibreOffice. Neutralize it by prefixing a single quote BEFORE the + // RFC-4180 quote-escaping below, so the value renders as literal text. + if (s.length > 0 && /^[=+\-@\t\r]/.test(s)) { + s = `'${s}`; + } if (/[",\r\n]/.test(s)) { return `"${s.replace(/"/g, '""')}"`; } diff --git a/lib/admin/metrics.ts b/lib/admin/metrics.ts index 9d182a3..c929a46 100644 --- a/lib/admin/metrics.ts +++ b/lib/admin/metrics.ts @@ -1,4 +1,5 @@ import { getSupabaseAdmin } from '@/lib/supabase/admin'; +import { withAdminRetry } from '@/lib/admin/retry'; export type AdminMetrics = { totalUsers: number; @@ -52,7 +53,9 @@ export async function computeMetrics(): Promise { let signups30d = 0; const perPage = 1000; for (let page = 1; page <= 50; page++) { - const { data, error } = await admin.auth.admin.listUsers({ page, perPage }); + const { data, error } = await withAdminRetry(() => + admin.auth.admin.listUsers({ page, perPage }), + ); if (error) break; const users = data.users; if (users.length === 0) break; diff --git a/lib/admin/retry.ts b/lib/admin/retry.ts index 9c75f60..07f4308 100644 --- a/lib/admin/retry.ts +++ b/lib/admin/retry.ts @@ -15,11 +15,20 @@ * surface as proper 4xx/5xx responses upstream. */ -// Up to 3 attempts total (1 initial + 2 retries). Delays are applied BEFORE the -// 2nd and 3rd attempts respectively, so worst-case added latency is ~350ms — -// kept well under a second to keep the admin surface snappy. -const MAX_ATTEMPTS = 3; -const RETRY_DELAYS_MS = [100, 250]; +// Up to 5 attempts total (1 initial + 4 retries). Delays are applied BEFORE +// attempts 2..5 respectively and are jittered (see `jitter`), so worst-case +// added latency is ~80+150+300+600 ≈ 1.13s plus jitter — kept under ~1.5s to +// keep the admin surface snappy while reliably riding out the empty-body / +// poisoned-keep-alive window that Node 24's bundled undici amplifies. +const MAX_ATTEMPTS = 5; +const RETRY_DELAYS_MS = [80, 150, 300, 600]; + +/** Apply ±30% random jitter so concurrent retries don't synchronise. */ +function jitter(ms: number): number { + if (ms <= 0) return 0; + const delta = ms * 0.3; + return Math.round(ms - delta + Math.random() * 2 * delta); +} /** Loose shape that both `AuthError` and `PostgrestError` satisfy. */ type MaybeError = @@ -116,7 +125,8 @@ export async function withAdminRetry( } if (attempt < attempts) { - const delay = delays[attempt - 1] ?? delays[delays.length - 1] ?? 0; + const base = delays[attempt - 1] ?? delays[delays.length - 1] ?? 0; + const delay = jitter(base); if (delay > 0) await sleep(delay); } } diff --git a/lib/supabase/admin.ts b/lib/supabase/admin.ts index 973cd8e..e902d3c 100644 --- a/lib/supabase/admin.ts +++ b/lib/supabase/admin.ts @@ -1,7 +1,36 @@ import { createClient, SupabaseClient } from '@supabase/supabase-js'; +import { Agent } from 'undici'; let _admin: SupabaseClient | null = null; +/** + * Dedicated undici dispatcher for all admin-client traffic (GoTrue + PostgREST). + * + * ROOT CAUSE this addresses: GoTrue/undici intermittently returns an empty or + * truncated HTTP body when a pooled keep-alive connection is REUSED right after + * a write (bulk ban/role/delete) and then immediately read back (the auto list + * refresh). supabase-js then throws `Unexpected end of JSON input` and the + * route 500s. Node 24's bundled undici reuses connections more aggressively, + * amplifying the race. + * + * `pipelining: 0` disables request pipelining on a connection, and the bounded + * keep-alive timeouts keep idle sockets from lingering long enough to be reused + * in a half-closed state. When a request DOES still hit a poisoned socket, + * undici destroys that socket on error, so the `withAdminRetry` wrapper at the + * call sites lands on a FRESH connection rather than the same dead one. The two + * layers together eliminate the empty-body 500s. + * + * NOTE: `setGlobalDispatcher` from the npm `undici` package does NOT affect + * Node's built-in global `fetch` (it bundles its own undici), so we attach this + * dispatcher explicitly via the `dispatcher` init option, which built-in fetch + * does honour. + */ +const adminDispatcher = new Agent({ + pipelining: 0, + keepAliveTimeout: 10_000, + keepAliveMaxTimeout: 10_000, +}); + export function getSupabaseAdmin(): SupabaseClient { if (_admin) return _admin; const url = process.env.NEXT_PUBLIC_SUPABASE_URL; @@ -14,9 +43,14 @@ export function getSupabaseAdmin(): SupabaseClient { global: { // Force every request the admin client makes (GoTrue listUsers and // PostgREST reads alike) to bypass Next's fetch Data Cache, so the admin - // surface always reflects current state regardless of page config. + // surface always reflects current state regardless of page config, and + // route it through the dedicated keep-alive-tamed dispatcher above. fetch: (input: RequestInfo | URL, init?: RequestInit) => - fetch(input, { ...init, cache: 'no-store' }), + fetch(input, { + ...init, + cache: 'no-store', + dispatcher: adminDispatcher, + } as RequestInit), }, }); return _admin; diff --git a/package-lock.json b/package-lock.json index 6f12389..44b5a6a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,7 +12,8 @@ "@supabase/supabase-js": "2.45.4", "next": "14.2.15", "react": "18.3.1", - "react-dom": "18.3.1" + "react-dom": "18.3.1", + "undici": "6.21.1" }, "devDependencies": { "@types/node": "20.16.10", @@ -608,6 +609,15 @@ "node": ">=14.17" } }, + "node_modules/undici": { + "version": "6.21.1", + "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.1.tgz", + "integrity": "sha512-q/1rj5D0/zayJB2FraXdaWxbhWiNKDvu8naDT2dl1yTlvJp4BLtOcp2a5BvgGNQpYYJzau7tf1WgKv3b+7mqpQ==", + "license": "MIT", + "engines": { + "node": ">=18.17" + } + }, "node_modules/undici-types": { "version": "6.19.8", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", diff --git a/package.json b/package.json index 16a3dff..6358ab2 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,8 @@ "@supabase/supabase-js": "2.45.4", "next": "14.2.15", "react": "18.3.1", - "react-dom": "18.3.1" + "react-dom": "18.3.1", + "undici": "6.21.1" }, "devDependencies": { "@types/node": "20.16.10",