security hardening

This commit is contained in:
Gerhard Scheikl
2026-05-31 09:35:31 +02:00
parent d7d437a871
commit 01b4734477
31 changed files with 1234 additions and 238 deletions
+174 -37
View File
@@ -1,67 +1,204 @@
import db from "../../db.server";
import { ensureWebhookCleanupScheduled } from "./cleanup.server";
/**
* Minimal shape of the Prisma client surface we use — declared inline so
* the helper can be unit-tested with a tiny stub instead of pulling in a
* real database.
* How long a `status="processing"` reservation is considered "live" before we
* assume the worker that claimed it crashed mid-process. After this window a
* stale reservation may be reclaimed and the work retried.
*/
const STALE_LEASE_MS = 5 * 60 * 1000; // 5 minutes
interface ProcessedRow {
webhookId: string;
status: string;
receivedAt: Date;
}
/**
* Minimal shape of the Prisma client surface we use — declared inline so the
* helper can be unit-tested with a tiny stub instead of a real database.
*/
export interface DedupeDeps {
db: {
processedWebhook: {
create: (args: {
data: { webhookId: string; topic: string; shopDomain: string };
data: { webhookId: string; topic: string; shopDomain: string; status: string };
}) => Promise<unknown>;
findUnique: (args: { where: { webhookId: string } }) => Promise<ProcessedRow | null>;
update: (args: {
where: { webhookId: string };
data: { status?: string; receivedAt?: Date };
}) => Promise<unknown>;
delete: (args: { where: { webhookId: string } }) => Promise<unknown>;
};
};
}
/**
* Returns `true` when this Shopify webhook delivery has already been
* processed and the caller should short-circuit without doing the work.
* A claim on a single Shopify webhook delivery. Obtained from
* {@link reserveWebhook}. The caller MUST eventually `commit()` (work
* succeeded — the delivery is permanently deduped) or `release()` (work
* failed — drop the reservation so Shopify's retry re-runs the work).
*
* Shopify retries webhook deliveries when it doesn't receive a 200 within
* its (~5s) timeout window. Without dedupe this caused us to email an
* invoice twice for the same order: the first slow delivery completed its
* work but Shopify timed out and re-sent the webhook, which then ran the
* automation a second time.
*
* We key on the `X-Shopify-Webhook-Id` header — Shopify guarantees the same
* value for retries of the same delivery, but a new value for genuinely
* new events. The insert is the lock: a unique-constraint violation
* (Prisma error code `P2002`) means another delivery already claimed this
* id.
* `commit`/`release` are no-ops for reservations without a webhook id (unit
* tests / non-Shopify callers) and for the fail-open path.
*/
export async function isDuplicateWebhook(
export interface WebhookReservation {
webhookId: string | null;
commit: () => Promise<void>;
release: () => Promise<void>;
}
function noopReservation(webhookId: string | null): WebhookReservation {
return {
webhookId,
commit: async () => {},
release: async () => {},
};
}
function isP2002(err: unknown): boolean {
// Duck-typed so callers can stub the db without pulling in the real
// `Prisma` namespace. P2002 = unique-constraint violation.
return (err as { code?: string } | null)?.code === "P2002";
}
function makeReservation(
webhookId: string,
shop: string,
topic: string,
deps: DedupeDeps,
): WebhookReservation {
return {
webhookId,
commit: async () => {
try {
await deps.db.processedWebhook.update({
where: { webhookId },
data: { status: "done" },
});
} catch (err) {
// The work already succeeded; a failed commit just risks a later
// duplicate (which the side-effect code is expected to tolerate).
console.warn(`dedupe: failed to commit webhook ${webhookId} (${topic}/${shop}):`, err);
}
},
release: async () => {
try {
await deps.db.processedWebhook.delete({ where: { webhookId } });
} catch (err) {
console.warn(`dedupe: failed to release webhook ${webhookId} (${topic}/${shop}):`, err);
}
},
};
}
/**
* Reserve this Shopify webhook delivery for processing.
*
* Shopify retries a delivery (re-using the same `X-Shopify-Webhook-Id`) when
* it doesn't receive a 200 within its ~5s timeout. Naively recording the id as
* "processed" *before* doing the work meant that if the heavy background work
* later failed (SMTP/GraphQL/PDF error), Shopify's retry was dropped as a
* duplicate and the invoice was never sent.
*
* This uses a two-phase reserve/commit keyed on the webhook id, with the
* unique `webhookId` primary key as the concurrency lock:
*
* - RESERVE: insert a `status="processing"` row. A unique-constraint
* violation (`P2002`) means the id is already claimed; we then inspect the
* existing row:
* - `done` → genuine duplicate → return `null` (skip).
* - `processing`, fresh → another delivery is in flight → `null`.
* - `processing`, stale → previous worker crashed → reclaim & retry.
* - COMMIT (caller, on success) → flip the row to `status="done"`.
* - RELEASE (caller, on failure) → delete the row so a retry reprocesses.
*
* Returns a {@link WebhookReservation} when the caller should process the
* delivery, or `null` when it must short-circuit (duplicate / concurrent).
*
* Fail-open: a dedupe-table error (other than P2002) never silently drops a
* webhook — we return a no-op reservation and let the work run.
*/
export async function reserveWebhook(
request: Request,
shop: string,
topic: string,
deps: DedupeDeps = { db },
): Promise<boolean> {
): Promise<WebhookReservation | null> {
// Opportunistically schedule TTL cleanup (runtime-only; never in build/CLI
// since this is reached only while handling a live webhook request).
ensureWebhookCleanupScheduled();
const webhookId = request.headers.get("x-shopify-webhook-id");
if (!webhookId) {
// Defensive: in unit tests / non-Shopify callers there is no id.
// Don't dedupe — that would silently drop legitimate calls.
return false;
// No id (unit tests / non-Shopify callers): process without dedupe.
return noopReservation(null);
}
const reservation = makeReservation(webhookId, shop, topic, deps);
try {
await deps.db.processedWebhook.create({
data: { webhookId, topic, shopDomain: shop },
data: { webhookId, topic, shopDomain: shop, status: "processing" },
});
return false;
return reservation;
} catch (err) {
// Duck-typed P2002 check so callers can stub the db without pulling
// in the real `Prisma` namespace.
if ((err as { code?: string } | null)?.code === "P2002") {
console.log(
`dedupe: skipping duplicate ${topic} delivery for ${shop} (webhookId=${webhookId})`,
);
return true;
if (!isP2002(err)) {
// Don't fail (or silently drop) a webhook on a logging-table issue.
console.warn(`dedupe: failed to reserve webhook ${webhookId} (${topic}/${shop}):`, err);
return noopReservation(webhookId);
}
// Don't fail the webhook on a logging-table issue; just process it.
console.warn(
`dedupe: failed to record webhook ${webhookId} (${topic}/${shop}):`,
err,
);
return false;
}
// A row already exists. Classify it.
let existing: ProcessedRow | null = null;
try {
existing = await deps.db.processedWebhook.findUnique({ where: { webhookId } });
} catch (err) {
console.warn(`dedupe: failed to load existing webhook ${webhookId} (${topic}/${shop}):`, err);
// Another worker owns the row and we can't classify it — be safe and skip.
return null;
}
if (!existing) {
// Raced with a release/delete between create() and findUnique(); reclaim.
return reservation;
}
if (existing.status === "done") {
console.log(
`dedupe: skipping already-processed ${topic} for ${shop} (webhookId=${webhookId})`,
);
return null;
}
const age = Date.now() - new Date(existing.receivedAt).getTime();
if (age > STALE_LEASE_MS) {
// The worker that reserved this crashed mid-process (or left a stale row).
// Renew the lease and retry the work.
try {
await deps.db.processedWebhook.update({
where: { webhookId },
data: { status: "processing", receivedAt: new Date() },
});
} catch (err) {
console.warn(`dedupe: failed to reclaim stale webhook ${webhookId}:`, err);
return null;
}
console.log(
`dedupe: reclaiming stale ${topic} reservation for ${shop} ` +
`(webhookId=${webhookId}, age=${Math.round(age / 1000)}s)`,
);
return reservation;
}
// A fresh "processing" row: another delivery is actively working on it.
// Skip this concurrent delivery. Shopify will retry; if the active worker
// fails it releases the reservation so a later retry reprocesses.
console.log(
`dedupe: ${topic} for ${shop} already in-flight (webhookId=${webhookId}); ` +
`skipping concurrent delivery`,
);
return null;
}