fix(admin): eliminate GoTrue empty-body 500s under bulk load (retry-all + undici keep-alive + sequential bulk), CSV formula-injection guard

This commit is contained in:
Gerhard Scheikl
2026-05-31 17:30:04 +02:00
parent cbd29445bb
commit 8e8df7ae64
12 changed files with 134 additions and 28 deletions
+7 -4
View File
@@ -1,6 +1,7 @@
import Link from 'next/link'; import Link from 'next/link';
import { computeMetrics } from '@/lib/admin/metrics'; import { computeMetrics } from '@/lib/admin/metrics';
import { getSupabaseAdmin } from '@/lib/supabase/admin'; import { getSupabaseAdmin } from '@/lib/supabase/admin';
import { withAdminRetry } from '@/lib/admin/retry';
import { formatBytes, formatDate } from '@/lib/format'; import { formatBytes, formatDate } from '@/lib/format';
export const dynamic = 'force-dynamic'; export const dynamic = 'force-dynamic';
@@ -18,10 +19,12 @@ export default async function AdminOverviewPage() {
const admin = getSupabaseAdmin(); const admin = getSupabaseAdmin();
// Recent signups (latest 5 users). // Recent signups (latest 5 users).
const { data: recentUsersData } = await admin.auth.admin.listUsers({ const { data: recentUsersData } = await withAdminRetry(() =>
page: 1, admin.auth.admin.listUsers({
perPage: 5, page: 1,
}); perPage: 5,
}),
);
const recentUsers = recentUsersData?.users ?? []; const recentUsers = recentUsersData?.users ?? [];
// Over-quota tunnels (compute in memory). // Over-quota tunnels (compute in memory).
+5
View File
@@ -237,6 +237,11 @@ export function TunnelsTable({
} catch { } catch {
result.fail++; result.fail++;
} }
// Tiny inter-op pacing: strictly sequential (concurrency 1) with a small
// gap so we never burst the admin API; single refresh after all settle.
if (i < targets.length - 1) {
await new Promise((r) => setTimeout(r, 75));
}
} }
setBulkProgress(null); setBulkProgress(null);
setBulkBusy(false); setBulkBusy(false);
+6
View File
@@ -136,6 +136,12 @@ export function UsersTable({
} catch { } catch {
result.fail++; result.fail++;
} }
// Tiny inter-op pacing: keep the per-row mutations strictly sequential
// (concurrency 1) with a small gap so we never burst the GoTrue admin
// API, then refresh ONCE after every op has settled (below).
if (i < targets.length - 1) {
await new Promise((r) => setTimeout(r, 75));
}
} }
setBulkProgress(null); setBulkProgress(null);
setBulkBusy(false); setBulkBusy(false);
+9 -3
View File
@@ -1,6 +1,7 @@
import { type NextRequest } from 'next/server'; import { type NextRequest } from 'next/server';
import { requireAdminApi } from '@/lib/auth/admin-guard'; import { requireAdminApi } from '@/lib/auth/admin-guard';
import { getSupabaseAdmin } from '@/lib/supabase/admin'; import { getSupabaseAdmin } from '@/lib/supabase/admin';
import { withAdminRetry } from '@/lib/admin/retry';
import { logAdminAction } from '@/lib/auth/audit'; import { logAdminAction } from '@/lib/auth/audit';
import { isUuid, parseBoolean } from '@/lib/admin/validators'; import { isUuid, parseBoolean } from '@/lib/admin/validators';
import { jsonNoStore } from '@/lib/admin/response'; import { jsonNoStore } from '@/lib/admin/response';
@@ -44,9 +45,14 @@ export async function POST(
} }
const admin = getSupabaseAdmin(); const admin = getSupabaseAdmin();
const { error } = await admin.auth.admin.updateUserById(id, { // Retry transient empty-body / poisoned-keep-alive failures so a burst-load
ban_duration: banned ? BAN_DURATION : 'none', // ban/unban doesn't 500. `updateUserById` is idempotent for our use
} as { ban_duration: string }); // (ban_duration set to the same value), so a retry is safe.
const { error } = await withAdminRetry(() =>
admin.auth.admin.updateUserById(id, {
ban_duration: banned ? BAN_DURATION : 'none',
} as { ban_duration: string }),
);
if (error) { if (error) {
console.error('admin user.ban failed', error); console.error('admin user.ban failed', error);
return jsonNoStore({ error: 'internal error' }, { status: 500 }); return jsonNoStore({ error: 'internal error' }, { status: 500 });
+14 -6
View File
@@ -1,6 +1,7 @@
import { type NextRequest } from 'next/server'; import { type NextRequest } from 'next/server';
import { requireAdminApi } from '@/lib/auth/admin-guard'; import { requireAdminApi } from '@/lib/auth/admin-guard';
import { getSupabaseAdmin } from '@/lib/supabase/admin'; import { getSupabaseAdmin } from '@/lib/supabase/admin';
import { withAdminRetry } from '@/lib/admin/retry';
import { logAdminAction } from '@/lib/auth/audit'; import { logAdminAction } from '@/lib/auth/audit';
import { isUuid } from '@/lib/admin/validators'; import { isUuid } from '@/lib/admin/validators';
import { jsonNoStore } from '@/lib/admin/response'; import { jsonNoStore } from '@/lib/admin/response';
@@ -42,17 +43,24 @@ export async function POST(
const admin = getSupabaseAdmin(); const admin = getSupabaseAdmin();
// Merge with existing app_metadata so we don't clobber other keys. // Merge with existing app_metadata so we don't clobber other keys. Retry the
const { data: existing, error: getErr } = // read on transient empty-body responses so a burst-load flake isn't
await admin.auth.admin.getUserById(id); // misreported as a 404; a genuine not-found still falls through below.
const { data: existing, error: getErr } = await withAdminRetry(() =>
admin.auth.admin.getUserById(id),
);
if (getErr || !existing.user) { if (getErr || !existing.user) {
return jsonNoStore({ error: 'user not found' }, { status: 404 }); return jsonNoStore({ error: 'user not found' }, { status: 404 });
} }
const merged = { ...(existing.user.app_metadata ?? {}), role }; const merged = { ...(existing.user.app_metadata ?? {}), role };
const { error } = await admin.auth.admin.updateUserById(id, { // `updateUserById` is idempotent here (same app_metadata), so retrying a
app_metadata: merged, // transient empty-body write is safe.
}); const { error } = await withAdminRetry(() =>
admin.auth.admin.updateUserById(id, {
app_metadata: merged,
}),
);
if (error) { if (error) {
console.error('admin user.role failed', error); console.error('admin user.role failed', error);
return jsonNoStore({ error: 'internal error' }, { status: 500 }); return jsonNoStore({ error: 'internal error' }, { status: 500 });
+17 -4
View File
@@ -119,11 +119,24 @@ export async function DELETE(
// Delete the AUTH USER first. Only if that succeeds do we remove the tunnel // Delete the AUTH USER first. Only if that succeeds do we remove the tunnel
// row, so a mid-failure never leaves an orphaned auth user with a dangling // row, so a mid-failure never leaves an orphaned auth user with a dangling
// tunnel (or half-deletes the tunnel of an already-gone user). // tunnel (or half-deletes the tunnel of an already-gone user). Retry the
const { error: delErr } = await admin.auth.admin.deleteUser(id); // delete on transient empty-body / poisoned-keep-alive responses.
const { error: delErr } = await withAdminRetry(() =>
admin.auth.admin.deleteUser(id),
);
if (delErr) { if (delErr) {
console.error('admin user.delete: deleteUser failed', delErr); // The delete may have actually succeeded with a truncated/empty response
return jsonNoStore({ error: 'internal error' }, { status: 500 }); // body (the transient failure mode). Re-check existence before reporting a
// 500: if the user is now gone, the deletion is effectively complete.
const { data: recheck, error: recheckErr } = await withAdminRetry(() =>
admin.auth.admin.getUserById(id),
);
if (recheckErr || !recheck?.user) {
// User is gone -> delete succeeded despite the truncated body.
} else {
console.error('admin user.delete: deleteUser failed', delErr);
return jsonNoStore({ error: 'internal error' }, { status: 500 });
}
} }
// The auth user is gone, so no orphaned-user state is possible. Removing the // The auth user is gone, so no orphaned-user state is possible. Removing the
+7
View File
@@ -11,6 +11,13 @@ export function csvField(v: unknown): string {
else if (typeof v === 'string') s = v; else if (typeof v === 'string') s = v;
else if (typeof v === 'object') s = JSON.stringify(v); else if (typeof v === 'object') s = JSON.stringify(v);
else s = String(v); else s = String(v);
// Spreadsheet formula-injection guard: a field whose first character is one
// of = + - @ (or a leading tab/CR) is interpreted as a formula by Excel /
// Sheets / LibreOffice. Neutralize it by prefixing a single quote BEFORE the
// RFC-4180 quote-escaping below, so the value renders as literal text.
if (s.length > 0 && /^[=+\-@\t\r]/.test(s)) {
s = `'${s}`;
}
if (/[",\r\n]/.test(s)) { if (/[",\r\n]/.test(s)) {
return `"${s.replace(/"/g, '""')}"`; return `"${s.replace(/"/g, '""')}"`;
} }
+4 -1
View File
@@ -1,4 +1,5 @@
import { getSupabaseAdmin } from '@/lib/supabase/admin'; import { getSupabaseAdmin } from '@/lib/supabase/admin';
import { withAdminRetry } from '@/lib/admin/retry';
export type AdminMetrics = { export type AdminMetrics = {
totalUsers: number; totalUsers: number;
@@ -52,7 +53,9 @@ export async function computeMetrics(): Promise<AdminMetrics> {
let signups30d = 0; let signups30d = 0;
const perPage = 1000; const perPage = 1000;
for (let page = 1; page <= 50; page++) { for (let page = 1; page <= 50; page++) {
const { data, error } = await admin.auth.admin.listUsers({ page, perPage }); const { data, error } = await withAdminRetry(() =>
admin.auth.admin.listUsers({ page, perPage }),
);
if (error) break; if (error) break;
const users = data.users; const users = data.users;
if (users.length === 0) break; if (users.length === 0) break;
+16 -6
View File
@@ -15,11 +15,20 @@
* surface as proper 4xx/5xx responses upstream. * surface as proper 4xx/5xx responses upstream.
*/ */
// Up to 3 attempts total (1 initial + 2 retries). Delays are applied BEFORE the // Up to 5 attempts total (1 initial + 4 retries). Delays are applied BEFORE
// 2nd and 3rd attempts respectively, so worst-case added latency is ~350ms — // attempts 2..5 respectively and are jittered (see `jitter`), so worst-case
// kept well under a second to keep the admin surface snappy. // added latency is ~80+150+300+600 ≈ 1.13s plus jitter — kept under ~1.5s to
const MAX_ATTEMPTS = 3; // keep the admin surface snappy while reliably riding out the empty-body /
const RETRY_DELAYS_MS = [100, 250]; // poisoned-keep-alive window that Node 24's bundled undici amplifies.
const MAX_ATTEMPTS = 5;
const RETRY_DELAYS_MS = [80, 150, 300, 600];
/** Apply ±30% random jitter so concurrent retries don't synchronise. */
function jitter(ms: number): number {
if (ms <= 0) return 0;
const delta = ms * 0.3;
return Math.round(ms - delta + Math.random() * 2 * delta);
}
/** Loose shape that both `AuthError` and `PostgrestError` satisfy. */ /** Loose shape that both `AuthError` and `PostgrestError` satisfy. */
type MaybeError = type MaybeError =
@@ -116,7 +125,8 @@ export async function withAdminRetry<R extends { error: MaybeError }>(
} }
if (attempt < attempts) { if (attempt < attempts) {
const delay = delays[attempt - 1] ?? delays[delays.length - 1] ?? 0; const base = delays[attempt - 1] ?? delays[delays.length - 1] ?? 0;
const delay = jitter(base);
if (delay > 0) await sleep(delay); if (delay > 0) await sleep(delay);
} }
} }
+36 -2
View File
@@ -1,7 +1,36 @@
import { createClient, SupabaseClient } from '@supabase/supabase-js'; import { createClient, SupabaseClient } from '@supabase/supabase-js';
import { Agent } from 'undici';
let _admin: SupabaseClient | null = null; let _admin: SupabaseClient | null = null;
/**
* Dedicated undici dispatcher for all admin-client traffic (GoTrue + PostgREST).
*
* ROOT CAUSE this addresses: GoTrue/undici intermittently returns an empty or
* truncated HTTP body when a pooled keep-alive connection is REUSED right after
* a write (bulk ban/role/delete) and then immediately read back (the auto list
* refresh). supabase-js then throws `Unexpected end of JSON input` and the
* route 500s. Node 24's bundled undici reuses connections more aggressively,
* amplifying the race.
*
* `pipelining: 0` disables request pipelining on a connection, and the bounded
* keep-alive timeouts keep idle sockets from lingering long enough to be reused
* in a half-closed state. When a request DOES still hit a poisoned socket,
* undici destroys that socket on error, so the `withAdminRetry` wrapper at the
* call sites lands on a FRESH connection rather than the same dead one. The two
* layers together eliminate the empty-body 500s.
*
* NOTE: `setGlobalDispatcher` from the npm `undici` package does NOT affect
* Node's built-in global `fetch` (it bundles its own undici), so we attach this
* dispatcher explicitly via the `dispatcher` init option, which built-in fetch
* does honour.
*/
const adminDispatcher = new Agent({
pipelining: 0,
keepAliveTimeout: 10_000,
keepAliveMaxTimeout: 10_000,
});
export function getSupabaseAdmin(): SupabaseClient { export function getSupabaseAdmin(): SupabaseClient {
if (_admin) return _admin; if (_admin) return _admin;
const url = process.env.NEXT_PUBLIC_SUPABASE_URL; const url = process.env.NEXT_PUBLIC_SUPABASE_URL;
@@ -14,9 +43,14 @@ export function getSupabaseAdmin(): SupabaseClient {
global: { global: {
// Force every request the admin client makes (GoTrue listUsers and // Force every request the admin client makes (GoTrue listUsers and
// PostgREST reads alike) to bypass Next's fetch Data Cache, so the admin // PostgREST reads alike) to bypass Next's fetch Data Cache, so the admin
// surface always reflects current state regardless of page config. // surface always reflects current state regardless of page config, and
// route it through the dedicated keep-alive-tamed dispatcher above.
fetch: (input: RequestInfo | URL, init?: RequestInit) => fetch: (input: RequestInfo | URL, init?: RequestInit) =>
fetch(input, { ...init, cache: 'no-store' }), fetch(input, {
...init,
cache: 'no-store',
dispatcher: adminDispatcher,
} as RequestInit),
}, },
}); });
return _admin; return _admin;
+11 -1
View File
@@ -12,7 +12,8 @@
"@supabase/supabase-js": "2.45.4", "@supabase/supabase-js": "2.45.4",
"next": "14.2.15", "next": "14.2.15",
"react": "18.3.1", "react": "18.3.1",
"react-dom": "18.3.1" "react-dom": "18.3.1",
"undici": "6.21.1"
}, },
"devDependencies": { "devDependencies": {
"@types/node": "20.16.10", "@types/node": "20.16.10",
@@ -608,6 +609,15 @@
"node": ">=14.17" "node": ">=14.17"
} }
}, },
"node_modules/undici": {
"version": "6.21.1",
"resolved": "https://registry.npmjs.org/undici/-/undici-6.21.1.tgz",
"integrity": "sha512-q/1rj5D0/zayJB2FraXdaWxbhWiNKDvu8naDT2dl1yTlvJp4BLtOcp2a5BvgGNQpYYJzau7tf1WgKv3b+7mqpQ==",
"license": "MIT",
"engines": {
"node": ">=18.17"
}
},
"node_modules/undici-types": { "node_modules/undici-types": {
"version": "6.19.8", "version": "6.19.8",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
+2 -1
View File
@@ -15,7 +15,8 @@
"@supabase/supabase-js": "2.45.4", "@supabase/supabase-js": "2.45.4",
"next": "14.2.15", "next": "14.2.15",
"react": "18.3.1", "react": "18.3.1",
"react-dom": "18.3.1" "react-dom": "18.3.1",
"undici": "6.21.1"
}, },
"devDependencies": { "devDependencies": {
"@types/node": "20.16.10", "@types/node": "20.16.10",