319 lines
12 KiB
TypeScript
319 lines
12 KiB
TypeScript
/**
|
|
* SSRF-hardened `fetch` for use whenever the URL we're about to call could
|
|
* be influenced by user input (shop settings, Shopify-supplied product
|
|
* image URLs, DB-stored Files URLs, …).
|
|
*
|
|
* Defenses:
|
|
* - Only `https:` is allowed by default. `http:` is allowed only for
|
|
* localhost when `NODE_ENV !== "production"` (handy for local dev).
|
|
* - Hostname is DNS-resolved and every returned address is checked
|
|
* against private / loopback / link-local / unique-local ranges.
|
|
* - The connection is then forced to the resolved IP (with the original
|
|
* Host header preserved) to defeat DNS-rebinding.
|
|
* - A hard request timeout is enforced (default 5 s).
|
|
* - Response size is capped while reading; we abort once the limit is
|
|
* exceeded instead of buffering the whole body first.
|
|
* - Redirects are not followed — if the caller wants a redirected target
|
|
* they have to re-validate it explicitly.
|
|
*
|
|
* The helper returns the raw bytes plus the response status / content-type
|
|
* so callers can decide what to do with them.
|
|
*/
|
|
import { lookup as dnsLookup } from "node:dns/promises";
|
|
import net from "node:net";
|
|
import { Agent as HttpAgent } from "node:http";
|
|
import { Agent as HttpsAgent } from "node:https";
|
|
import http from "node:http";
|
|
import https from "node:https";
|
|
import ipaddr from "ipaddr.js";
|
|
|
|
export interface SafeFetchOptions {
|
|
/** Hard cap in bytes; the read aborts as soon as this is exceeded. */
|
|
maxBytes?: number;
|
|
/** Total request timeout in milliseconds (default 5000). */
|
|
timeoutMs?: number;
|
|
/** Optional `Accept` header. */
|
|
accept?: string;
|
|
/**
|
|
* If non-empty, only hosts whose lowercase name equals one of these or
|
|
* ends with `.<entry>` are allowed. Useful for locking calls to known
|
|
* good CDNs (e.g. `cdn.shopify.com`).
|
|
*/
|
|
allowedHosts?: string[];
|
|
}
|
|
|
|
export interface SafeFetchResult {
|
|
status: number;
|
|
contentType: string | null;
|
|
bytes: Uint8Array;
|
|
bytesRead: number;
|
|
}
|
|
|
|
export class SafeFetchError extends Error {
|
|
readonly code: string;
|
|
constructor(code: string, message: string) {
|
|
super(message);
|
|
this.code = code;
|
|
this.name = "SafeFetchError";
|
|
}
|
|
}
|
|
|
|
const DEFAULT_TIMEOUT_MS = 5_000;
|
|
const DEFAULT_MAX_BYTES = 8 * 1024 * 1024; // 8 MB
|
|
|
|
/**
|
|
* Default-deny address classifier backed by the well-vetted `ipaddr.js`
|
|
* library. An address is considered safe to connect to ONLY if it is a
|
|
* clearly public, globally-routable unicast address. Everything else —
|
|
* loopback, private (RFC1918), link-local, unique-local, multicast,
|
|
* reserved, unspecified, broadcast, carrier-grade NAT, plus the various
|
|
* IPv4-in-IPv6 tunnelling/transition forms — is rejected.
|
|
*
|
|
* This closes IPv6 bypasses that string-prefix checks miss, e.g.:
|
|
* - `::ffff:7f00:1` (IPv4-mapped HEX form of 127.0.0.1)
|
|
* - `::7f00:1` (deprecated IPv4-compatible ::127.0.0.1)
|
|
* - `fe90::` / `fea0::` / `feb0::` (link-local is fe80::/10, not just fe80:)
|
|
*/
|
|
function isSafePublicAddress(ip: string): boolean {
|
|
let addr: ipaddr.IPv4 | ipaddr.IPv6;
|
|
try {
|
|
addr = ipaddr.parse(ip);
|
|
} catch {
|
|
// Unparseable => treat as unsafe.
|
|
return false;
|
|
}
|
|
|
|
if (addr.kind() === "ipv4") {
|
|
// Only globally-routable unicast IPv4 is allowed. `range()` returns
|
|
// 'unicast' exclusively for public space; private/loopback/linkLocal/
|
|
// carrierGradeNat/reserved/broadcast/multicast/unspecified are all denied.
|
|
return (addr as ipaddr.IPv4).range() === "unicast";
|
|
}
|
|
|
|
const v6 = addr as ipaddr.IPv6;
|
|
|
|
// Unwrap IPv4-mapped (::ffff:a.b.c.d, incl. hex form ::ffff:7f00:1) and
|
|
// validate the embedded IPv4 against the v4 policy.
|
|
if (v6.isIPv4MappedAddress()) {
|
|
return v6.toIPv4Address().range() === "unicast";
|
|
}
|
|
|
|
// Deprecated IPv4-compatible addresses live in ::/96 (first 96 bits zero,
|
|
// e.g. ::7f00:1 == ::127.0.0.1). ipaddr.js classifies these as plain
|
|
// 'unicast', so unwrap the trailing 32 bits and validate as IPv4. This
|
|
// also covers :: (unspecified) and ::1 (loopback), which map to
|
|
// 0.0.0.0 / 0.0.0.1 and are denied by the IPv4 policy.
|
|
const p = v6.parts;
|
|
if (p[0] === 0 && p[1] === 0 && p[2] === 0 && p[3] === 0 && p[4] === 0 && p[5] === 0) {
|
|
const v4 = new ipaddr.IPv4([(p[6] >> 8) & 0xff, p[6] & 0xff, (p[7] >> 8) & 0xff, p[7] & 0xff]);
|
|
return v4.range() === "unicast";
|
|
}
|
|
|
|
// Everything else: only true global unicast is allowed. This rejects
|
|
// loopback, linkLocal (fe80::/10), uniqueLocal (fc00::/7), multicast,
|
|
// reserved, 6to4, teredo, rfc6145/rfc6052 transition ranges, etc.
|
|
return v6.range() === "unicast";
|
|
}
|
|
|
|
function isPrivateAddress(ip: string): boolean {
|
|
return !isSafePublicAddress(ip);
|
|
}
|
|
|
|
function hostMatchesAllowlist(hostname: string, allowed: string[] | undefined): boolean {
|
|
if (!allowed || allowed.length === 0) return true;
|
|
const h = hostname.toLowerCase();
|
|
return allowed.some((entry) => {
|
|
const e = entry.toLowerCase();
|
|
return h === e || h.endsWith(`.${e}`);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Resolves a hostname to an IPv4/IPv6 address that has been vetted against
|
|
* the private/loopback ranges. Throws `SafeFetchError` if no safe address
|
|
* can be obtained.
|
|
*/
|
|
async function resolveSafeAddress(hostname: string): Promise<{ address: string; family: number }> {
|
|
// If the hostname is already an IP literal, validate it directly.
|
|
if (net.isIP(hostname)) {
|
|
const family = net.isIPv6(hostname) ? 6 : 4;
|
|
if (isPrivateAddress(hostname)) {
|
|
throw new SafeFetchError("blocked-address", `Refusing to connect to private address ${hostname}`);
|
|
}
|
|
return { address: hostname, family };
|
|
}
|
|
let results: { address: string; family: number }[];
|
|
try {
|
|
results = await dnsLookup(hostname, { all: true });
|
|
} catch (err) {
|
|
throw new SafeFetchError("dns-failed", `DNS lookup failed for ${hostname}: ${(err as Error).message}`);
|
|
}
|
|
for (const r of results) {
|
|
if (isPrivateAddress(r.address)) {
|
|
throw new SafeFetchError("blocked-address", `${hostname} resolves to private address ${r.address}`);
|
|
}
|
|
}
|
|
const first = results[0];
|
|
if (!first) throw new SafeFetchError("dns-empty", `${hostname} resolved to no addresses`);
|
|
return { address: first.address, family: first.family };
|
|
}
|
|
|
|
/**
|
|
* Performs an SSRF-safe HTTP(S) GET. Throws `SafeFetchError` for policy
|
|
* violations; throws plain `Error` for transport failures (mirroring the
|
|
* standard `fetch` error model).
|
|
*/
|
|
export async function safeFetch(rawUrl: string, opts: SafeFetchOptions = {}): Promise<SafeFetchResult> {
|
|
let url: URL;
|
|
try {
|
|
url = new URL(rawUrl);
|
|
} catch {
|
|
throw new SafeFetchError("bad-url", `Invalid URL: ${rawUrl}`);
|
|
}
|
|
|
|
const allowHttp =
|
|
process.env.NODE_ENV !== "production" &&
|
|
(url.hostname === "localhost" || url.hostname === "127.0.0.1" || url.hostname === "::1");
|
|
if (url.protocol !== "https:" && !(url.protocol === "http:" && allowHttp)) {
|
|
throw new SafeFetchError("bad-scheme", `Refusing non-https URL: ${url.protocol}//${url.hostname}`);
|
|
}
|
|
|
|
if (!hostMatchesAllowlist(url.hostname, opts.allowedHosts)) {
|
|
throw new SafeFetchError("host-not-allowed", `Host ${url.hostname} is not on the allowlist`);
|
|
}
|
|
|
|
const { address, family } = await resolveSafeAddress(url.hostname);
|
|
|
|
const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
const maxBytes = opts.maxBytes ?? DEFAULT_MAX_BYTES;
|
|
|
|
// Pin the resolved IP. We pass an Agent with a custom `lookup` that always
|
|
// returns our pre-validated address, so the actual TCP connect can't be
|
|
// re-resolved to something else (DNS-rebinding defense).
|
|
//
|
|
// Note: Node 20+ enables Happy Eyeballs (`autoSelectFamily: true`) by
|
|
// default on the http/https agents. Happy Eyeballs calls `lookup` with
|
|
// `{ all: true }` and expects the callback to receive an *array* of
|
|
// `{ address, family }` records. If we ignore that and always invoke the
|
|
// 3-arg form, the connector hands `undefined` to `socket.connect()`,
|
|
// which then throws `Invalid IP address: undefined`.
|
|
type LookupCb =
|
|
| ((err: NodeJS.ErrnoException | null, address: string, family: number) => void)
|
|
| ((err: NodeJS.ErrnoException | null, addresses: { address: string; family: number }[]) => void);
|
|
const pinnedLookup = (
|
|
_hostname: string,
|
|
optionsOrCb: { all?: boolean; family?: number } | LookupCb,
|
|
maybeCb?: LookupCb,
|
|
) => {
|
|
let options: { all?: boolean; family?: number } = {};
|
|
let cb: LookupCb;
|
|
if (typeof optionsOrCb === "function") {
|
|
cb = optionsOrCb;
|
|
} else {
|
|
options = optionsOrCb ?? {};
|
|
cb = maybeCb as LookupCb;
|
|
}
|
|
if (options.all) {
|
|
(cb as (err: NodeJS.ErrnoException | null, addresses: { address: string; family: number }[]) => void)(
|
|
null,
|
|
[{ address, family }],
|
|
);
|
|
} else {
|
|
(cb as (err: NodeJS.ErrnoException | null, address: string, family: number) => void)(
|
|
null,
|
|
address,
|
|
family,
|
|
);
|
|
}
|
|
};
|
|
|
|
const isHttps = url.protocol === "https:";
|
|
const agent = isHttps
|
|
? new HttpsAgent({ keepAlive: false, lookup: pinnedLookup as never })
|
|
: new HttpAgent({ keepAlive: false, lookup: pinnedLookup as never });
|
|
|
|
const headers: Record<string, string> = {
|
|
Host: url.host,
|
|
"User-Agent": "linumiq-invoice/1.0 (+https://linumiq.com)",
|
|
};
|
|
if (opts.accept) headers["Accept"] = opts.accept;
|
|
|
|
const requestOptions: http.RequestOptions = {
|
|
method: "GET",
|
|
host: url.hostname,
|
|
port: url.port ? parseInt(url.port, 10) : isHttps ? 443 : 80,
|
|
path: `${url.pathname}${url.search}`,
|
|
headers,
|
|
agent,
|
|
// Defeat redirects (Node's http doesn't follow by default).
|
|
};
|
|
|
|
return new Promise<SafeFetchResult>((resolve, reject) => {
|
|
const lib = isHttps ? https : http;
|
|
const req = lib.request(requestOptions, (res) => {
|
|
const status = res.statusCode ?? 0;
|
|
// Reject 3xx — caller must explicitly re-call with the new URL.
|
|
if (status >= 300 && status < 400) {
|
|
res.resume();
|
|
reject(new SafeFetchError("redirect-not-allowed", `Refusing redirect ${status} from ${rawUrl}`));
|
|
return;
|
|
}
|
|
const chunks: Buffer[] = [];
|
|
let total = 0;
|
|
res.on("data", (chunk: Buffer) => {
|
|
total += chunk.length;
|
|
if (total > maxBytes) {
|
|
res.destroy(new SafeFetchError("too-large", `Response exceeded ${maxBytes} bytes`));
|
|
return;
|
|
}
|
|
chunks.push(chunk);
|
|
});
|
|
res.on("end", () => {
|
|
const buf = Buffer.concat(chunks, total);
|
|
resolve({
|
|
status,
|
|
contentType: res.headers["content-type"] ?? null,
|
|
bytes: new Uint8Array(buf),
|
|
bytesRead: total,
|
|
});
|
|
});
|
|
res.on("error", (err) => reject(err));
|
|
});
|
|
req.setTimeout(timeoutMs, () => {
|
|
req.destroy(new SafeFetchError("timeout", `Request to ${url.hostname} exceeded ${timeoutMs}ms`));
|
|
});
|
|
req.on("error", (err) => reject(err));
|
|
req.end();
|
|
});
|
|
}
|
|
|
|
/** Common allowlist for Shopify-served assets (CDN + Files). */
|
|
export const SHOPIFY_CDN_HOSTS = ["cdn.shopify.com", "shopifycdn.com", "shopify.com"];
|
|
|
|
/**
|
|
* Boundary validation for merchant-supplied URLs (e.g. the logo URL saved in
|
|
* settings). Requires a syntactically valid `https:` URL whose host is a DNS
|
|
* name rather than an IP literal (v4 or v6). Returns a user-facing error
|
|
* string when the URL is unacceptable, or `null` when it is fine to store.
|
|
*
|
|
* This is a defence-in-depth boundary check; `safeFetch` remains the runtime
|
|
* backstop that re-validates the resolved address at fetch time.
|
|
*/
|
|
export function validateMerchantHttpsUrl(raw: string): string | null {
|
|
let url: URL;
|
|
try {
|
|
url = new URL(raw);
|
|
} catch {
|
|
return "Enter a valid URL including the https:// prefix.";
|
|
}
|
|
if (url.protocol !== "https:") {
|
|
return "Logo URL must use https://.";
|
|
}
|
|
// URL.hostname wraps IPv6 literals in brackets; strip them before checking.
|
|
const host = url.hostname.replace(/^\[/, "").replace(/\]$/, "");
|
|
if (net.isIP(host) !== 0) {
|
|
return "Logo URL must point to a domain name, not an IP address.";
|
|
}
|
|
return null;
|
|
}
|