fix(pdf-compress): stream large uploads via busboy instead of arrayBuffer

req.arrayBuffer() fails with 502 on files >100MB because it tries to buffer the entire body in memory before the route handler runs. New approach: busboy streams the multipart body directly to a temp file on disk — never buffers the whole request in memory. Works for any size. Shared helper: parse-upload.ts (busboy streaming, 500MB limit, fields). Both local (qpdf) and cloud (iLovePDF) routes refactored to use it. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 18:07:16 +02:00
parent f5deccd8ea
commit 5a7de39f6a
5 changed files with 211 additions and 210 deletions
@@ -11,6 +11,7 @@
        "@prisma/client": "^6.19.2",
        "axios": "^1.13.6",
        "axios-cookiejar-support": "^6.0.5",
+        "busboy": "^1.6.0",
        "class-variance-authority": "^0.7.1",
        "clsx": "^2.1.1",
        "docx": "^9.6.0",
@@ -35,6 +36,7 @@
      },
      "devDependencies": {
        "@tailwindcss/postcss": "^4",
+        "@types/busboy": "^1.5.4",
        "@types/jszip": "^3.4.0",
        "@types/node": "^20",
        "@types/nodemailer": "^7.0.11",
@@ -3995,6 +3997,16 @@
        "tslib": "^2.4.0"
      }
    },
+    "node_modules/@types/busboy": {
+      "version": "1.5.4",
+      "resolved": "https://registry.npmjs.org/@types/busboy/-/busboy-1.5.4.tgz",
+      "integrity": "sha512-kG7WrUuAKK0NoyxfQHsVE6j1m01s6kMma64E+OZenQABMQyTJop1DumUWcLwAQ2JzpefU7PDYoRDKl8uZosFjw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
    "node_modules/@types/estree": {
      "version": "1.0.8",
      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
@@ -5303,6 +5315,17 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
+    "node_modules/busboy": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz",
+      "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==",
+      "dependencies": {
+        "streamsearch": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=10.16.0"
+      }
+    },
    "node_modules/bytes": {
      "version": "3.1.2",
      "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
@@ -12144,6 +12167,14 @@
        "stream-chain": "^2.2.5"
      }
    },
+    "node_modules/streamsearch": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz",
+      "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==",
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
    "node_modules/strict-event-emitter": {
      "version": "0.5.1",
      "resolved": "https://registry.npmjs.org/strict-event-emitter/-/strict-event-emitter-0.5.1.tgz",
@@ -12,6 +12,7 @@
    "@prisma/client": "^6.19.2",
    "axios": "^1.13.6",
    "axios-cookiejar-support": "^6.0.5",
+    "busboy": "^1.6.0",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "docx": "^9.6.0",
@@ -36,6 +37,7 @@
  },
  "devDependencies": {
    "@tailwindcss/postcss": "^4",
+    "@types/busboy": "^1.5.4",
    "@types/jszip": "^3.4.0",
    "@types/node": "^20",
    "@types/nodemailer": "^7.0.11",
@@ -1,4 +1,7 @@
 import { NextRequest, NextResponse } from "next/server";
+import { readFile, unlink } from "fs/promises";
+import { join } from "path";
+import { parseMultipartUpload } from "../parse-upload";

 /**
 * iLovePDF API integration for PDF compression.
@@ -6,97 +9,24 @@ import { NextRequest, NextResponse } from "next/server";
 * Workflow: auth → start → upload → process → download
 * Docs: https://www.iloveapi.com/docs/api-reference
 *
- * Env vars: ILOVEPDF_PUBLIC_KEY, ILOVEPDF_SECRET_KEY
+ * Env vars: ILOVEPDF_PUBLIC_KEY
 * Free tier: 250 files/month
 */

 const ILOVEPDF_PUBLIC_KEY = process.env.ILOVEPDF_PUBLIC_KEY ?? "";
 const API_BASE = "https://api.ilovepdf.com/v1";

-/**
- * Extract the file binary from a raw multipart/form-data buffer.
- */
-function extractFileFromMultipart(
-  raw: Buffer,
-  boundary: string,
-): { buffer: Buffer; filename: string } | null {
-  const boundaryBuf = Buffer.from(`--${boundary}`);
-  const headerSep = Buffer.from("\r\n\r\n");
-  const crlf = Buffer.from("\r\n");
-
-  let searchFrom = 0;
-
-  while (searchFrom < raw.length) {
-    const partStart = raw.indexOf(boundaryBuf, searchFrom);
-    if (partStart === -1) break;
-
-    const lineEnd = raw.indexOf(crlf, partStart);
-    if (lineEnd === -1) break;
-
-    const headerEnd = raw.indexOf(headerSep, lineEnd);
-    if (headerEnd === -1) break;
-
-    const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
-    if (headers.includes("filename=")) {
-      const fileStart = headerEnd + 4;
-
-      // Extract original filename
-      const fnMatch = headers.match(/filename="([^"]+)"/);
-      const filename = fnMatch?.[1] ?? "input.pdf";
-
-      const closingMarker = Buffer.from(`\r\n--${boundary}`);
-      const fileEnd = raw.lastIndexOf(closingMarker);
-
-      const buffer =
-        fileEnd > fileStart
-          ? raw.subarray(fileStart, fileEnd)
-          : raw.subarray(fileStart);
-
-      return { buffer, filename };
+async function cleanup(dir: string) {
+  try {
+    const { readdir, rmdir } = await import("fs/promises");
+    const files = await readdir(dir);
+    for (const f of files) {
+      await unlink(join(dir, f)).catch(() => {});
    }
-
-    searchFrom = headerEnd + 4;
+    await rmdir(dir).catch(() => {});
+  } catch {
+    // non-critical
  }
-
-  return null;
-}
-
-/**
- * Extract a text field value from multipart body.
- */
-function extractFieldFromMultipart(
-  raw: Buffer,
-  boundary: string,
-  fieldName: string,
-): string | null {
-  const boundaryBuf = Buffer.from(`--${boundary}`);
-  const headerSep = Buffer.from("\r\n\r\n");
-  const crlf = Buffer.from("\r\n");
-  const namePattern = `name="${fieldName}"`;
-
-  let searchFrom = 0;
-  while (searchFrom < raw.length) {
-    const partStart = raw.indexOf(boundaryBuf, searchFrom);
-    if (partStart === -1) break;
-    const lineEnd = raw.indexOf(crlf, partStart);
-    if (lineEnd === -1) break;
-    const headerEnd = raw.indexOf(headerSep, lineEnd);
-    if (headerEnd === -1) break;
-
-    const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
-    if (headers.includes(namePattern) && !headers.includes("filename=")) {
-      const valueStart = headerEnd + 4;
-      const nextBoundary = raw.indexOf(
-        Buffer.from(`\r\n--${boundary}`),
-        valueStart,
-      );
-      if (nextBoundary > valueStart) {
-        return raw.subarray(valueStart, nextBoundary).toString("utf8").trim();
-      }
-    }
-    searchFrom = headerEnd + 4;
-  }
-  return null;
 }

 export async function POST(req: NextRequest) {
@@ -110,39 +40,23 @@ export async function POST(req: NextRequest) {
    );
  }

+  let tmpDir = "";
  try {
-    // Parse multipart body
-    if (!req.body) {
+    // Stream upload to disk — works for any file size
+    const upload = await parseMultipartUpload(req);
+    tmpDir = upload.tmpDir;
+
+    const originalSize = upload.size;
+
+    if (originalSize < 100) {
      return NextResponse.json(
-        { error: "Lipsește fișierul PDF." },
+        { error: "Fișierul PDF este gol sau prea mic." },
        { status: 400 },
      );
    }

-    const rawBuf = Buffer.from(await req.arrayBuffer());
-    const contentType = req.headers.get("content-type") || "";
-    const boundaryMatch = contentType.match(
-      /boundary=(?:"([^"]+)"|([^\s;]+))/,
-    );
-    const boundary = boundaryMatch?.[1] ?? boundaryMatch?.[2] ?? "";
-
-    if (!boundary) {
-      return NextResponse.json(
-        { error: "Invalid request." },
-        { status: 400 },
-      );
-    }
-
-    const fileData = extractFileFromMultipart(rawBuf, boundary);
-    if (!fileData || fileData.buffer.length < 100) {
-      return NextResponse.json(
-        { error: "Fișierul PDF nu a putut fi extras." },
-        { status: 400 },
-      );
-    }
-
-    // Extract compression level (extreme / recommended / low)
-    const levelParam = extractFieldFromMultipart(rawBuf, boundary, "level");
+    // Compression level from form field
+    const levelParam = upload.fields["level"] ?? "";
    const compressionLevel =
      levelParam === "extreme"
        ? "extreme"
@@ -150,8 +64,6 @@ export async function POST(req: NextRequest) {
          ? "low"
          : "recommended";

-    const originalSize = fileData.buffer.length;
-
    // Step 1: Authenticate
    const authRes = await fetch(`${API_BASE}/auth`, {
      method: "POST",
@@ -188,20 +100,21 @@ export async function POST(req: NextRequest) {
      task: string;
    };

-    // Step 3: Upload file
+    // Step 3: Upload file (read from disk to avoid double-buffering)
+    const fileBuffer = await readFile(upload.filePath);
    const uploadForm = new FormData();
    uploadForm.append("task", task);
    uploadForm.append(
      "file",
-      new Blob([new Uint8Array(fileData.buffer)], { type: "application/pdf" }),
-      fileData.filename,
+      new Blob([new Uint8Array(fileBuffer)], { type: "application/pdf" }),
+      upload.filename,
    );

    const uploadRes = await fetch(`https://${server}/v1/upload`, {
      method: "POST",
      headers: { Authorization: `Bearer ${token}` },
      body: uploadForm,
-      signal: AbortSignal.timeout(300_000), // 5 min for large files
+      signal: AbortSignal.timeout(600_000), // 10 min for very large files
    });

    if (!uploadRes.ok) {
@@ -230,11 +143,11 @@ export async function POST(req: NextRequest) {
        files: [
          {
            server_filename,
-            filename: fileData.filename,
+            filename: upload.filename,
          },
        ],
      }),
-      signal: AbortSignal.timeout(300_000),
+      signal: AbortSignal.timeout(600_000),
    });

    if (!processRes.ok) {
@@ -250,7 +163,7 @@ export async function POST(req: NextRequest) {
      `https://${server}/v1/download/${task}`,
      {
        headers: { Authorization: `Bearer ${token}` },
-        signal: AbortSignal.timeout(300_000),
+        signal: AbortSignal.timeout(600_000),
      },
    );

@@ -268,7 +181,7 @@ export async function POST(req: NextRequest) {
    const resultBuffer = Buffer.from(await resultBlob.arrayBuffer());
    const compressedSize = resultBuffer.length;

-    // Clean up task on iLovePDF
+    // Clean up task on iLovePDF (fire and forget)
    fetch(`https://${server}/v1/task/${task}`, {
      method: "DELETE",
      headers: { Authorization: `Bearer ${token}` },
@@ -278,7 +191,7 @@ export async function POST(req: NextRequest) {
      status: 200,
      headers: {
        "Content-Type": "application/pdf",
-        "Content-Disposition": `attachment; filename="${fileData.filename.replace(/\.pdf$/i, "-comprimat.pdf")}"`,
+        "Content-Disposition": `attachment; filename="${upload.filename.replace(/\.pdf$/i, "-comprimat.pdf")}"`,
        "X-Original-Size": String(originalSize),
        "X-Compressed-Size": String(compressedSize),
      },
@@ -289,5 +202,7 @@ export async function POST(req: NextRequest) {
      { error: `Eroare iLovePDF: ${message}` },
      { status: 500 },
    );
+  } finally {
+    if (tmpDir) await cleanup(tmpDir);
  }
 }
@@ -1,10 +1,9 @@
 import { NextRequest, NextResponse } from "next/server";
-import { writeFile, readFile, unlink, mkdir, stat } from "fs/promises";
+import { readFile, unlink, stat } from "fs/promises";
 import { execFile } from "child_process";
 import { promisify } from "util";
-import { randomUUID } from "crypto";
 import { join } from "path";
-import { tmpdir } from "os";
+import { parseMultipartUpload } from "../parse-upload";

 const execFileAsync = promisify(execFile);

@@ -24,58 +23,13 @@ function qpdfArgs(input: string, output: string): string[] {
  ];
 }

-/**
- * Extract the file binary from a raw multipart/form-data buffer.
- * Finds the part whose Content-Disposition contains `filename=`,
- * then returns the bytes between the header-end and the closing boundary.
- */
-function extractFileFromMultipart(
-  raw: Buffer,
-  boundary: string,
-): Buffer | null {
-  const boundaryBuf = Buffer.from(`--${boundary}`);
-  const headerSep = Buffer.from("\r\n\r\n");
-  const crlf = Buffer.from("\r\n");
-
-  let searchFrom = 0;
-
-  while (searchFrom < raw.length) {
-    const partStart = raw.indexOf(boundaryBuf, searchFrom);
-    if (partStart === -1) break;
-
-    const lineEnd = raw.indexOf(crlf, partStart);
-    if (lineEnd === -1) break;
-
-    const headerEnd = raw.indexOf(headerSep, lineEnd);
-    if (headerEnd === -1) break;
-
-    const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
-    if (headers.includes("filename=")) {
-      const fileStart = headerEnd + 4;
-
-      const closingMarker = Buffer.from(`\r\n--${boundary}`);
-      const fileEnd = raw.lastIndexOf(closingMarker);
-
-      if (fileEnd > fileStart) {
-        return raw.subarray(fileStart, fileEnd);
-      }
-      return raw.subarray(fileStart);
-    }
-
-    searchFrom = headerEnd + 4;
-  }
-
-  return null;
-}
-
 async function cleanup(dir: string) {
  try {
-    const { readdir } = await import("fs/promises");
+    const { readdir, rmdir } = await import("fs/promises");
    const files = await readdir(dir);
    for (const f of files) {
      await unlink(join(dir, f)).catch(() => {});
    }
-    const { rmdir } = await import("fs/promises");
    await rmdir(dir).catch(() => {});
  } catch {
    // cleanup failure is non-critical
@@ -83,51 +37,27 @@ async function cleanup(dir: string) {
 }

 export async function POST(req: NextRequest) {
-  const tmpDir = join(tmpdir(), `pdf-qpdf-${randomUUID()}`);
+  let tmpDir = "";
  try {
-    await mkdir(tmpDir, { recursive: true });
+    // Stream upload to disk — works for any file size
+    const upload = await parseMultipartUpload(req);
+    tmpDir = upload.tmpDir;

-    const inputPath = join(tmpDir, "input.pdf");
-    const outputPath = join(tmpDir, "output.pdf");
+    const inputPath = upload.filePath;
+    const outputPath = join(upload.tmpDir, "output.pdf");
+    const originalSize = upload.size;

-    if (!req.body) {
+    if (originalSize < 100) {
      return NextResponse.json(
-        { error: "Lipsește fișierul PDF." },
+        { error: "Fișierul PDF este gol sau prea mic." },
        { status: 400 },
      );
    }

-    const rawBuf = Buffer.from(await req.arrayBuffer());
-
-    const contentType = req.headers.get("content-type") || "";
-    const boundaryMatch = contentType.match(
-      /boundary=(?:"([^"]+)"|([^\s;]+))/,
-    );
-    const boundary = boundaryMatch?.[1] ?? boundaryMatch?.[2] ?? "";
-
-    if (!boundary) {
-      return NextResponse.json(
-        { error: "Invalid request — missing multipart boundary." },
-        { status: 400 },
-      );
-    }
-
-    const pdfBuffer = extractFileFromMultipart(rawBuf, boundary);
-
-    if (!pdfBuffer || pdfBuffer.length < 100) {
-      return NextResponse.json(
-        { error: "Fișierul PDF este gol sau nu a putut fi extras." },
-        { status: 400 },
-      );
-    }
-
-    await writeFile(inputPath, pdfBuffer);
-    const originalSize = pdfBuffer.length;
-
    // qpdf: lossless structural optimization — fonts and images untouched
    try {
      await execFileAsync("qpdf", qpdfArgs(inputPath, outputPath), {
-        timeout: 120_000,
+        timeout: 300_000, // 5 min for very large files
        maxBuffer: 10 * 1024 * 1024,
      });
    } catch (qpdfErr) {
@@ -139,7 +69,7 @@ export async function POST(req: NextRequest) {
          { status: 501 },
        );
      }
-      // qpdf returns exit code 3 for warnings — output is still valid
+      // qpdf exit code 3 = warnings, output is still valid
      const exitCode =
        qpdfErr && typeof qpdfErr === "object" && "code" in qpdfErr
          ? (qpdfErr as { code: number }).code
@@ -153,10 +83,8 @@ export async function POST(req: NextRequest) {
    }

    // Verify output exists
-    let outputSize = 0;
    try {
-      const s = await stat(outputPath);
-      outputSize = s.size;
+      await stat(outputPath);
    } catch {
      return NextResponse.json(
        { error: "qpdf nu a produs fișier output." },
@@ -169,7 +97,8 @@ export async function POST(req: NextRequest) {

    // If compression made it bigger, return original
    if (compressedSize >= originalSize) {
-      return new NextResponse(new Uint8Array(pdfBuffer), {
+      const originalBuffer = await readFile(inputPath);
+      return new NextResponse(new Uint8Array(originalBuffer), {
        status: 200,
        headers: {
          "Content-Type": "application/pdf",
@@ -196,6 +125,6 @@ export async function POST(req: NextRequest) {
      { status: 500 },
    );
  } finally {
-    await cleanup(tmpDir);
+    if (tmpDir) await cleanup(tmpDir);
  }
 }
@@ -0,0 +1,124 @@
+/**
+ * Streaming multipart parser for large PDF uploads.
+ *
+ * Uses `busboy` to stream the file part directly to disk — never buffers the
+ * entire request body in memory. Works reliably for files of any size.
+ */
+
+import { NextRequest } from "next/server";
+import { createWriteStream } from "fs";
+import { mkdir } from "fs/promises";
+import { randomUUID } from "crypto";
+import { join } from "path";
+import { tmpdir } from "os";
+import { Readable } from "stream";
+import Busboy from "busboy";
+
+export interface ParsedUpload {
+  /** Absolute path to the extracted PDF on disk */
+  filePath: string;
+  /** Original filename from the upload */
+  filename: string;
+  /** File size in bytes */
+  size: number;
+  /** Temp directory (caller should clean up) */
+  tmpDir: string;
+  /** Any extra form fields (e.g. "level") */
+  fields: Record<string, string>;
+}
+
+/**
+ * Parse a multipart/form-data request, streaming the file to a temp directory.
+ * Returns the path to the extracted file on disk + metadata.
+ */
+export function parseMultipartUpload(req: NextRequest): Promise<ParsedUpload> {
+  return new Promise((resolve, reject) => {
+    const contentType = req.headers.get("content-type");
+    if (!contentType || !req.body) {
+      return reject(new Error("Lipsește fișierul PDF."));
+    }
+
+    const tmpDir = join(tmpdir(), `pdf-upload-${randomUUID()}`);
+    const fields: Record<string, string> = {};
+
+    let resolved = false;
+
+    const bb = Busboy({
+      headers: { "content-type": contentType },
+      limits: {
+        fileSize: 500 * 1024 * 1024, // 500 MB max
+        files: 1,
+      },
+    });
+
+    bb.on("field", (name: string, val: string) => {
+      fields[name] = val;
+    });
+
+    bb.on(
+      "file",
+      (
+        _name: string,
+        file: NodeJS.ReadableStream,
+        info: { filename: string; encoding: string; mimeType: string },
+      ) => {
+        const filename = info.filename || "input.pdf";
+        const filePath = join(tmpDir, filename);
+
+        // Ensure temp dir exists, then stream file to disk
+        mkdir(tmpDir, { recursive: true })
+          .then(() => {
+            let size = 0;
+            const ws = createWriteStream(filePath);
+
+            file.on("data", (chunk: Buffer) => {
+              size += chunk.length;
+            });
+
+            file.pipe(ws);
+
+            ws.on("finish", () => {
+              if (!resolved) {
+                resolved = true;
+                resolve({ filePath, filename, size, tmpDir, fields });
+              }
+            });
+
+            ws.on("error", (err) => {
+              if (!resolved) {
+                resolved = true;
+                reject(err);
+              }
+            });
+          })
+          .catch((err) => {
+            if (!resolved) {
+              resolved = true;
+              reject(err);
+            }
+          });
+      },
+    );
+
+    bb.on("error", (err: Error) => {
+      if (!resolved) {
+        resolved = true;
+        reject(err);
+      }
+    });
+
+    bb.on("close", () => {
+      // If no file was found in the upload
+      if (!resolved) {
+        resolved = true;
+        reject(new Error("Lipsește fișierul PDF din upload."));
+      }
+    });
+
+    // Pipe the Web ReadableStream into busboy
+    const nodeStream = Readable.fromWeb(
+      req.body as import("stream/web").ReadableStream,
+    );
+    nodeStream.pipe(bb);
+  });
+}