fix(pdf-compress): stream large uploads via busboy instead of arrayBuffer

req.arrayBuffer() fails with 502 on files >100MB because it tries to
buffer the entire body in memory before the route handler runs.

New approach: busboy streams the multipart body directly to a temp file
on disk — never buffers the whole request in memory. Works for any size.

Shared helper: parse-upload.ts (busboy streaming, 500MB limit, fields).
Both local (qpdf) and cloud (iLovePDF) routes refactored to use it.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
AI Assistant
2026-03-13 18:07:16 +02:00
parent f5deccd8ea
commit 5a7de39f6a
5 changed files with 211 additions and 210 deletions
+31
View File
@@ -11,6 +11,7 @@
"@prisma/client": "^6.19.2",
"axios": "^1.13.6",
"axios-cookiejar-support": "^6.0.5",
"busboy": "^1.6.0",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"docx": "^9.6.0",
@@ -35,6 +36,7 @@
},
"devDependencies": {
"@tailwindcss/postcss": "^4",
"@types/busboy": "^1.5.4",
"@types/jszip": "^3.4.0",
"@types/node": "^20",
"@types/nodemailer": "^7.0.11",
@@ -3995,6 +3997,16 @@
"tslib": "^2.4.0"
}
},
"node_modules/@types/busboy": {
"version": "1.5.4",
"resolved": "https://registry.npmjs.org/@types/busboy/-/busboy-1.5.4.tgz",
"integrity": "sha512-kG7WrUuAKK0NoyxfQHsVE6j1m01s6kMma64E+OZenQABMQyTJop1DumUWcLwAQ2JzpefU7PDYoRDKl8uZosFjw==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/node": "*"
}
},
"node_modules/@types/estree": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
@@ -5303,6 +5315,17 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/busboy": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz",
"integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==",
"dependencies": {
"streamsearch": "^1.1.0"
},
"engines": {
"node": ">=10.16.0"
}
},
"node_modules/bytes": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
@@ -12144,6 +12167,14 @@
"stream-chain": "^2.2.5"
}
},
"node_modules/streamsearch": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz",
"integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==",
"engines": {
"node": ">=10.0.0"
}
},
"node_modules/strict-event-emitter": {
"version": "0.5.1",
"resolved": "https://registry.npmjs.org/strict-event-emitter/-/strict-event-emitter-0.5.1.tgz",
+2
View File
@@ -12,6 +12,7 @@
"@prisma/client": "^6.19.2",
"axios": "^1.13.6",
"axios-cookiejar-support": "^6.0.5",
"busboy": "^1.6.0",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"docx": "^9.6.0",
@@ -36,6 +37,7 @@
},
"devDependencies": {
"@tailwindcss/postcss": "^4",
"@types/busboy": "^1.5.4",
"@types/jszip": "^3.4.0",
"@types/node": "^20",
"@types/nodemailer": "^7.0.11",
+36 -121
View File
@@ -1,4 +1,7 @@
import { NextRequest, NextResponse } from "next/server";
import { readFile, unlink } from "fs/promises";
import { join } from "path";
import { parseMultipartUpload } from "../parse-upload";
/**
* iLovePDF API integration for PDF compression.
@@ -6,97 +9,24 @@ import { NextRequest, NextResponse } from "next/server";
* Workflow: auth → start → upload → process → download
* Docs: https://www.iloveapi.com/docs/api-reference
*
* Env vars: ILOVEPDF_PUBLIC_KEY, ILOVEPDF_SECRET_KEY
* Env vars: ILOVEPDF_PUBLIC_KEY
* Free tier: 250 files/month
*/
const ILOVEPDF_PUBLIC_KEY = process.env.ILOVEPDF_PUBLIC_KEY ?? "";
const API_BASE = "https://api.ilovepdf.com/v1";
/**
* Extract the file binary from a raw multipart/form-data buffer.
*/
function extractFileFromMultipart(
raw: Buffer,
boundary: string,
): { buffer: Buffer; filename: string } | null {
const boundaryBuf = Buffer.from(`--${boundary}`);
const headerSep = Buffer.from("\r\n\r\n");
const crlf = Buffer.from("\r\n");
let searchFrom = 0;
while (searchFrom < raw.length) {
const partStart = raw.indexOf(boundaryBuf, searchFrom);
if (partStart === -1) break;
const lineEnd = raw.indexOf(crlf, partStart);
if (lineEnd === -1) break;
const headerEnd = raw.indexOf(headerSep, lineEnd);
if (headerEnd === -1) break;
const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
if (headers.includes("filename=")) {
const fileStart = headerEnd + 4;
// Extract original filename
const fnMatch = headers.match(/filename="([^"]+)"/);
const filename = fnMatch?.[1] ?? "input.pdf";
const closingMarker = Buffer.from(`\r\n--${boundary}`);
const fileEnd = raw.lastIndexOf(closingMarker);
const buffer =
fileEnd > fileStart
? raw.subarray(fileStart, fileEnd)
: raw.subarray(fileStart);
return { buffer, filename };
async function cleanup(dir: string) {
try {
const { readdir, rmdir } = await import("fs/promises");
const files = await readdir(dir);
for (const f of files) {
await unlink(join(dir, f)).catch(() => {});
}
searchFrom = headerEnd + 4;
await rmdir(dir).catch(() => {});
} catch {
// non-critical
}
return null;
}
/**
* Extract a text field value from multipart body.
*/
function extractFieldFromMultipart(
raw: Buffer,
boundary: string,
fieldName: string,
): string | null {
const boundaryBuf = Buffer.from(`--${boundary}`);
const headerSep = Buffer.from("\r\n\r\n");
const crlf = Buffer.from("\r\n");
const namePattern = `name="${fieldName}"`;
let searchFrom = 0;
while (searchFrom < raw.length) {
const partStart = raw.indexOf(boundaryBuf, searchFrom);
if (partStart === -1) break;
const lineEnd = raw.indexOf(crlf, partStart);
if (lineEnd === -1) break;
const headerEnd = raw.indexOf(headerSep, lineEnd);
if (headerEnd === -1) break;
const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
if (headers.includes(namePattern) && !headers.includes("filename=")) {
const valueStart = headerEnd + 4;
const nextBoundary = raw.indexOf(
Buffer.from(`\r\n--${boundary}`),
valueStart,
);
if (nextBoundary > valueStart) {
return raw.subarray(valueStart, nextBoundary).toString("utf8").trim();
}
}
searchFrom = headerEnd + 4;
}
return null;
}
export async function POST(req: NextRequest) {
@@ -110,39 +40,23 @@ export async function POST(req: NextRequest) {
);
}
let tmpDir = "";
try {
// Parse multipart body
if (!req.body) {
// Stream upload to disk — works for any file size
const upload = await parseMultipartUpload(req);
tmpDir = upload.tmpDir;
const originalSize = upload.size;
if (originalSize < 100) {
return NextResponse.json(
{ error: "Lipsește fișierul PDF." },
{ error: "Fișierul PDF este gol sau prea mic." },
{ status: 400 },
);
}
const rawBuf = Buffer.from(await req.arrayBuffer());
const contentType = req.headers.get("content-type") || "";
const boundaryMatch = contentType.match(
/boundary=(?:"([^"]+)"|([^\s;]+))/,
);
const boundary = boundaryMatch?.[1] ?? boundaryMatch?.[2] ?? "";
if (!boundary) {
return NextResponse.json(
{ error: "Invalid request." },
{ status: 400 },
);
}
const fileData = extractFileFromMultipart(rawBuf, boundary);
if (!fileData || fileData.buffer.length < 100) {
return NextResponse.json(
{ error: "Fișierul PDF nu a putut fi extras." },
{ status: 400 },
);
}
// Extract compression level (extreme / recommended / low)
const levelParam = extractFieldFromMultipart(rawBuf, boundary, "level");
// Compression level from form field
const levelParam = upload.fields["level"] ?? "";
const compressionLevel =
levelParam === "extreme"
? "extreme"
@@ -150,8 +64,6 @@ export async function POST(req: NextRequest) {
? "low"
: "recommended";
const originalSize = fileData.buffer.length;
// Step 1: Authenticate
const authRes = await fetch(`${API_BASE}/auth`, {
method: "POST",
@@ -188,20 +100,21 @@ export async function POST(req: NextRequest) {
task: string;
};
// Step 3: Upload file
// Step 3: Upload file (read from disk to avoid double-buffering)
const fileBuffer = await readFile(upload.filePath);
const uploadForm = new FormData();
uploadForm.append("task", task);
uploadForm.append(
"file",
new Blob([new Uint8Array(fileData.buffer)], { type: "application/pdf" }),
fileData.filename,
new Blob([new Uint8Array(fileBuffer)], { type: "application/pdf" }),
upload.filename,
);
const uploadRes = await fetch(`https://${server}/v1/upload`, {
method: "POST",
headers: { Authorization: `Bearer ${token}` },
body: uploadForm,
signal: AbortSignal.timeout(300_000), // 5 min for large files
signal: AbortSignal.timeout(600_000), // 10 min for very large files
});
if (!uploadRes.ok) {
@@ -230,11 +143,11 @@ export async function POST(req: NextRequest) {
files: [
{
server_filename,
filename: fileData.filename,
filename: upload.filename,
},
],
}),
signal: AbortSignal.timeout(300_000),
signal: AbortSignal.timeout(600_000),
});
if (!processRes.ok) {
@@ -250,7 +163,7 @@ export async function POST(req: NextRequest) {
`https://${server}/v1/download/${task}`,
{
headers: { Authorization: `Bearer ${token}` },
signal: AbortSignal.timeout(300_000),
signal: AbortSignal.timeout(600_000),
},
);
@@ -268,7 +181,7 @@ export async function POST(req: NextRequest) {
const resultBuffer = Buffer.from(await resultBlob.arrayBuffer());
const compressedSize = resultBuffer.length;
// Clean up task on iLovePDF
// Clean up task on iLovePDF (fire and forget)
fetch(`https://${server}/v1/task/${task}`, {
method: "DELETE",
headers: { Authorization: `Bearer ${token}` },
@@ -278,7 +191,7 @@ export async function POST(req: NextRequest) {
status: 200,
headers: {
"Content-Type": "application/pdf",
"Content-Disposition": `attachment; filename="${fileData.filename.replace(/\.pdf$/i, "-comprimat.pdf")}"`,
"Content-Disposition": `attachment; filename="${upload.filename.replace(/\.pdf$/i, "-comprimat.pdf")}"`,
"X-Original-Size": String(originalSize),
"X-Compressed-Size": String(compressedSize),
},
@@ -289,5 +202,7 @@ export async function POST(req: NextRequest) {
{ error: `Eroare iLovePDF: ${message}` },
{ status: 500 },
);
} finally {
if (tmpDir) await cleanup(tmpDir);
}
}
+18 -89
View File
@@ -1,10 +1,9 @@
import { NextRequest, NextResponse } from "next/server";
import { writeFile, readFile, unlink, mkdir, stat } from "fs/promises";
import { readFile, unlink, stat } from "fs/promises";
import { execFile } from "child_process";
import { promisify } from "util";
import { randomUUID } from "crypto";
import { join } from "path";
import { tmpdir } from "os";
import { parseMultipartUpload } from "../parse-upload";
const execFileAsync = promisify(execFile);
@@ -24,58 +23,13 @@ function qpdfArgs(input: string, output: string): string[] {
];
}
/**
* Extract the file binary from a raw multipart/form-data buffer.
* Finds the part whose Content-Disposition contains `filename=`,
* then returns the bytes between the header-end and the closing boundary.
*/
function extractFileFromMultipart(
raw: Buffer,
boundary: string,
): Buffer | null {
const boundaryBuf = Buffer.from(`--${boundary}`);
const headerSep = Buffer.from("\r\n\r\n");
const crlf = Buffer.from("\r\n");
let searchFrom = 0;
while (searchFrom < raw.length) {
const partStart = raw.indexOf(boundaryBuf, searchFrom);
if (partStart === -1) break;
const lineEnd = raw.indexOf(crlf, partStart);
if (lineEnd === -1) break;
const headerEnd = raw.indexOf(headerSep, lineEnd);
if (headerEnd === -1) break;
const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
if (headers.includes("filename=")) {
const fileStart = headerEnd + 4;
const closingMarker = Buffer.from(`\r\n--${boundary}`);
const fileEnd = raw.lastIndexOf(closingMarker);
if (fileEnd > fileStart) {
return raw.subarray(fileStart, fileEnd);
}
return raw.subarray(fileStart);
}
searchFrom = headerEnd + 4;
}
return null;
}
async function cleanup(dir: string) {
try {
const { readdir } = await import("fs/promises");
const { readdir, rmdir } = await import("fs/promises");
const files = await readdir(dir);
for (const f of files) {
await unlink(join(dir, f)).catch(() => {});
}
const { rmdir } = await import("fs/promises");
await rmdir(dir).catch(() => {});
} catch {
// cleanup failure is non-critical
@@ -83,51 +37,27 @@ async function cleanup(dir: string) {
}
export async function POST(req: NextRequest) {
const tmpDir = join(tmpdir(), `pdf-qpdf-${randomUUID()}`);
let tmpDir = "";
try {
await mkdir(tmpDir, { recursive: true });
// Stream upload to disk — works for any file size
const upload = await parseMultipartUpload(req);
tmpDir = upload.tmpDir;
const inputPath = join(tmpDir, "input.pdf");
const outputPath = join(tmpDir, "output.pdf");
const inputPath = upload.filePath;
const outputPath = join(upload.tmpDir, "output.pdf");
const originalSize = upload.size;
if (!req.body) {
if (originalSize < 100) {
return NextResponse.json(
{ error: "Lipsește fișierul PDF." },
{ error: "Fișierul PDF este gol sau prea mic." },
{ status: 400 },
);
}
const rawBuf = Buffer.from(await req.arrayBuffer());
const contentType = req.headers.get("content-type") || "";
const boundaryMatch = contentType.match(
/boundary=(?:"([^"]+)"|([^\s;]+))/,
);
const boundary = boundaryMatch?.[1] ?? boundaryMatch?.[2] ?? "";
if (!boundary) {
return NextResponse.json(
{ error: "Invalid request — missing multipart boundary." },
{ status: 400 },
);
}
const pdfBuffer = extractFileFromMultipart(rawBuf, boundary);
if (!pdfBuffer || pdfBuffer.length < 100) {
return NextResponse.json(
{ error: "Fișierul PDF este gol sau nu a putut fi extras." },
{ status: 400 },
);
}
await writeFile(inputPath, pdfBuffer);
const originalSize = pdfBuffer.length;
// qpdf: lossless structural optimization — fonts and images untouched
try {
await execFileAsync("qpdf", qpdfArgs(inputPath, outputPath), {
timeout: 120_000,
timeout: 300_000, // 5 min for very large files
maxBuffer: 10 * 1024 * 1024,
});
} catch (qpdfErr) {
@@ -139,7 +69,7 @@ export async function POST(req: NextRequest) {
{ status: 501 },
);
}
// qpdf returns exit code 3 for warnings output is still valid
// qpdf exit code 3 = warnings, output is still valid
const exitCode =
qpdfErr && typeof qpdfErr === "object" && "code" in qpdfErr
? (qpdfErr as { code: number }).code
@@ -153,10 +83,8 @@ export async function POST(req: NextRequest) {
}
// Verify output exists
let outputSize = 0;
try {
const s = await stat(outputPath);
outputSize = s.size;
await stat(outputPath);
} catch {
return NextResponse.json(
{ error: "qpdf nu a produs fișier output." },
@@ -169,7 +97,8 @@ export async function POST(req: NextRequest) {
// If compression made it bigger, return original
if (compressedSize >= originalSize) {
return new NextResponse(new Uint8Array(pdfBuffer), {
const originalBuffer = await readFile(inputPath);
return new NextResponse(new Uint8Array(originalBuffer), {
status: 200,
headers: {
"Content-Type": "application/pdf",
@@ -196,6 +125,6 @@ export async function POST(req: NextRequest) {
{ status: 500 },
);
} finally {
await cleanup(tmpDir);
if (tmpDir) await cleanup(tmpDir);
}
}
+124
View File
@@ -0,0 +1,124 @@
/**
* Streaming multipart parser for large PDF uploads.
*
* Uses `busboy` to stream the file part directly to disk — never buffers the
* entire request body in memory. Works reliably for files of any size.
*/
import { NextRequest } from "next/server";
import { createWriteStream } from "fs";
import { mkdir } from "fs/promises";
import { randomUUID } from "crypto";
import { join } from "path";
import { tmpdir } from "os";
import { Readable } from "stream";
import Busboy from "busboy";
export interface ParsedUpload {
/** Absolute path to the extracted PDF on disk */
filePath: string;
/** Original filename from the upload */
filename: string;
/** File size in bytes */
size: number;
/** Temp directory (caller should clean up) */
tmpDir: string;
/** Any extra form fields (e.g. "level") */
fields: Record<string, string>;
}
/**
* Parse a multipart/form-data request, streaming the file to a temp directory.
* Returns the path to the extracted file on disk + metadata.
*/
export function parseMultipartUpload(req: NextRequest): Promise<ParsedUpload> {
return new Promise((resolve, reject) => {
const contentType = req.headers.get("content-type");
if (!contentType || !req.body) {
return reject(new Error("Lipsește fișierul PDF."));
}
const tmpDir = join(tmpdir(), `pdf-upload-${randomUUID()}`);
const fields: Record<string, string> = {};
let resolved = false;
const bb = Busboy({
headers: { "content-type": contentType },
limits: {
fileSize: 500 * 1024 * 1024, // 500 MB max
files: 1,
},
});
bb.on("field", (name: string, val: string) => {
fields[name] = val;
});
bb.on(
"file",
(
_name: string,
file: NodeJS.ReadableStream,
info: { filename: string; encoding: string; mimeType: string },
) => {
const filename = info.filename || "input.pdf";
const filePath = join(tmpDir, filename);
// Ensure temp dir exists, then stream file to disk
mkdir(tmpDir, { recursive: true })
.then(() => {
let size = 0;
const ws = createWriteStream(filePath);
file.on("data", (chunk: Buffer) => {
size += chunk.length;
});
file.pipe(ws);
ws.on("finish", () => {
if (!resolved) {
resolved = true;
resolve({ filePath, filename, size, tmpDir, fields });
}
});
ws.on("error", (err) => {
if (!resolved) {
resolved = true;
reject(err);
}
});
})
.catch((err) => {
if (!resolved) {
resolved = true;
reject(err);
}
});
},
);
bb.on("error", (err: Error) => {
if (!resolved) {
resolved = true;
reject(err);
}
});
bb.on("close", () => {
// If no file was found in the upload
if (!resolved) {
resolved = true;
reject(new Error("Lipsește fișierul PDF din upload."));
}
});
// Pipe the Web ReadableStream into busboy
const nodeStream = Readable.fromWeb(
req.body as import("stream/web").ReadableStream,
);
nodeStream.pipe(bb);
});
}