fix(pdf-compress): stream large uploads via busboy instead of arrayBuffer

req.arrayBuffer() fails with 502 on files >100MB because it tries to
buffer the entire body in memory before the route handler runs.

New approach: busboy streams the multipart body directly to a temp file
on disk — never buffers the whole request in memory. Works for any size.

Shared helper: parse-upload.ts (busboy streaming, 500MB limit, fields).
Both local (qpdf) and cloud (iLovePDF) routes refactored to use it.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
AI Assistant
2026-03-13 18:07:16 +02:00
parent f5deccd8ea
commit 5a7de39f6a
5 changed files with 211 additions and 210 deletions
+18 -89
View File
@@ -1,10 +1,9 @@
import { NextRequest, NextResponse } from "next/server";
import { writeFile, readFile, unlink, mkdir, stat } from "fs/promises";
import { readFile, unlink, stat } from "fs/promises";
import { execFile } from "child_process";
import { promisify } from "util";
import { randomUUID } from "crypto";
import { join } from "path";
import { tmpdir } from "os";
import { parseMultipartUpload } from "../parse-upload";
const execFileAsync = promisify(execFile);
@@ -24,58 +23,13 @@ function qpdfArgs(input: string, output: string): string[] {
];
}
/**
* Extract the file binary from a raw multipart/form-data buffer.
* Finds the part whose Content-Disposition contains `filename=`,
* then returns the bytes between the header-end and the closing boundary.
*/
function extractFileFromMultipart(
raw: Buffer,
boundary: string,
): Buffer | null {
const boundaryBuf = Buffer.from(`--${boundary}`);
const headerSep = Buffer.from("\r\n\r\n");
const crlf = Buffer.from("\r\n");
let searchFrom = 0;
while (searchFrom < raw.length) {
const partStart = raw.indexOf(boundaryBuf, searchFrom);
if (partStart === -1) break;
const lineEnd = raw.indexOf(crlf, partStart);
if (lineEnd === -1) break;
const headerEnd = raw.indexOf(headerSep, lineEnd);
if (headerEnd === -1) break;
const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
if (headers.includes("filename=")) {
const fileStart = headerEnd + 4;
const closingMarker = Buffer.from(`\r\n--${boundary}`);
const fileEnd = raw.lastIndexOf(closingMarker);
if (fileEnd > fileStart) {
return raw.subarray(fileStart, fileEnd);
}
return raw.subarray(fileStart);
}
searchFrom = headerEnd + 4;
}
return null;
}
async function cleanup(dir: string) {
try {
const { readdir } = await import("fs/promises");
const { readdir, rmdir } = await import("fs/promises");
const files = await readdir(dir);
for (const f of files) {
await unlink(join(dir, f)).catch(() => {});
}
const { rmdir } = await import("fs/promises");
await rmdir(dir).catch(() => {});
} catch {
// cleanup failure is non-critical
@@ -83,51 +37,27 @@ async function cleanup(dir: string) {
}
export async function POST(req: NextRequest) {
const tmpDir = join(tmpdir(), `pdf-qpdf-${randomUUID()}`);
let tmpDir = "";
try {
await mkdir(tmpDir, { recursive: true });
// Stream upload to disk — works for any file size
const upload = await parseMultipartUpload(req);
tmpDir = upload.tmpDir;
const inputPath = join(tmpDir, "input.pdf");
const outputPath = join(tmpDir, "output.pdf");
const inputPath = upload.filePath;
const outputPath = join(upload.tmpDir, "output.pdf");
const originalSize = upload.size;
if (!req.body) {
if (originalSize < 100) {
return NextResponse.json(
{ error: "Lipsește fișierul PDF." },
{ error: "Fișierul PDF este gol sau prea mic." },
{ status: 400 },
);
}
const rawBuf = Buffer.from(await req.arrayBuffer());
const contentType = req.headers.get("content-type") || "";
const boundaryMatch = contentType.match(
/boundary=(?:"([^"]+)"|([^\s;]+))/,
);
const boundary = boundaryMatch?.[1] ?? boundaryMatch?.[2] ?? "";
if (!boundary) {
return NextResponse.json(
{ error: "Invalid request — missing multipart boundary." },
{ status: 400 },
);
}
const pdfBuffer = extractFileFromMultipart(rawBuf, boundary);
if (!pdfBuffer || pdfBuffer.length < 100) {
return NextResponse.json(
{ error: "Fișierul PDF este gol sau nu a putut fi extras." },
{ status: 400 },
);
}
await writeFile(inputPath, pdfBuffer);
const originalSize = pdfBuffer.length;
// qpdf: lossless structural optimization — fonts and images untouched
try {
await execFileAsync("qpdf", qpdfArgs(inputPath, outputPath), {
timeout: 120_000,
timeout: 300_000, // 5 min for very large files
maxBuffer: 10 * 1024 * 1024,
});
} catch (qpdfErr) {
@@ -139,7 +69,7 @@ export async function POST(req: NextRequest) {
{ status: 501 },
);
}
// qpdf returns exit code 3 for warnings output is still valid
// qpdf exit code 3 = warnings, output is still valid
const exitCode =
qpdfErr && typeof qpdfErr === "object" && "code" in qpdfErr
? (qpdfErr as { code: number }).code
@@ -153,10 +83,8 @@ export async function POST(req: NextRequest) {
}
// Verify output exists
let outputSize = 0;
try {
const s = await stat(outputPath);
outputSize = s.size;
await stat(outputPath);
} catch {
return NextResponse.json(
{ error: "qpdf nu a produs fișier output." },
@@ -169,7 +97,8 @@ export async function POST(req: NextRequest) {
// If compression made it bigger, return original
if (compressedSize >= originalSize) {
return new NextResponse(new Uint8Array(pdfBuffer), {
const originalBuffer = await readFile(inputPath);
return new NextResponse(new Uint8Array(originalBuffer), {
status: 200,
headers: {
"Content-Type": "application/pdf",
@@ -196,6 +125,6 @@ export async function POST(req: NextRequest) {
{ status: 500 },
);
} finally {
await cleanup(tmpDir);
if (tmpDir) await cleanup(tmpDir);
}
}