fix(pdf-compress): fix broken multipart parsing + add body size limit

Extreme mode: replace fragile manual multipart boundary parsing (which extracted only a fraction of large files, producing empty PDFs) with standard req.formData(). Add GS output validation + stderr capture. Stirling mode: parse formData first then build fresh FormData for Stirling instead of raw body passthrough (which lost data on large files). Add 5min timeout + original/compressed size headers. next.config: add 250MB body size limit for server actions. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 16:18:34 +02:00
parent 81c61d8411
commit 194ddf0849
3 changed files with 109 additions and 98 deletions
@@ -2,6 +2,11 @@ import type { NextConfig } from "next";

 const nextConfig: NextConfig = {
  output: 'standalone',
+  experimental: {
+    serverActions: {
+      bodySizeLimit: '250mb',
+    },
+  },
 };

 export default nextConfig;
@@ -1,13 +1,10 @@
 import { NextRequest, NextResponse } from "next/server";
-import { writeFile, readFile, unlink, mkdir, stat } from "fs/promises";
-import { createWriteStream } from "fs";
+import { writeFile, readFile, unlink, mkdir } from "fs/promises";
 import { execFile } from "child_process";
 import { promisify } from "util";
 import { randomUUID } from "crypto";
 import { join } from "path";
 import { tmpdir } from "os";
-import { Readable } from "stream";
-import { pipeline } from "stream/promises";

 const execFileAsync = promisify(execFile);

@@ -20,7 +17,6 @@ function gsArgs(input: string, output: string): string[] {
    "-dCompatibilityLevel=1.5",
    "-dNOPAUSE",
    "-dBATCH",
-    "-dQUIET",
    `-sOutputFile=${output}`,
    "-dPDFSETTINGS=/screen",
    // Force recompression of ALL images (the #1 key to matching iLovePDF)
@@ -100,105 +96,59 @@ async function cleanup(dir: string) {
 export async function POST(req: NextRequest) {
  const tmpDir = join(tmpdir(), `pdf-extreme-${randomUUID()}`);
  try {
-    // Stream body directly to temp file — avoids req.formData() size limit
-    // that causes "Failed to parse body as FormData" on large files
    await mkdir(tmpDir, { recursive: true });

-    const rawPath = join(tmpDir, "raw-upload");
    const inputPath = join(tmpDir, "input.pdf");
    const gsOutputPath = join(tmpDir, "gs-output.pdf");
    const finalOutputPath = join(tmpDir, "final.pdf");

-    if (!req.body) {
+    // Use standard formData() — works reliably for large files in Next.js 16
+    let pdfBuffer: Buffer;
+    try {
+      const formData = await req.formData();
+      const fileField = formData.get("fileInput");
+      if (!fileField || !(fileField instanceof Blob)) {
        return NextResponse.json(
          { error: "Lipsește fișierul PDF." },
          { status: 400 },
        );
      }
-
-    // Write the raw multipart body to disk
-    const nodeStream = Readable.fromWeb(req.body as import("stream/web").ReadableStream);
-    await pipeline(nodeStream, createWriteStream(rawPath));
-
-    // Extract the PDF binary from the multipart body.
-    // Multipart format:
-    //   --boundary\r\n
-    //   Content-Disposition: form-data; name="fileInput"; filename="x.pdf"\r\n
-    //   Content-Type: application/pdf\r\n
-    //   \r\n
-    //   <FILE BYTES>
-    //   \r\n--boundary--\r\n
-    const rawBuf = await readFile(rawPath);
-
-    const contentType = req.headers.get("content-type") || "";
-    const boundaryMatch = contentType.match(/boundary=(?:"([^"]+)"|([^\s;]+))/);
-    const boundary = boundaryMatch?.[1] ?? boundaryMatch?.[2] ?? "";
-
-    if (!boundary) {
+      pdfBuffer = Buffer.from(await fileField.arrayBuffer());
+    } catch (parseErr) {
+      const msg =
+        parseErr instanceof Error ? parseErr.message : "Parse error";
      return NextResponse.json(
-        { error: "Lipsește fișierul PDF." },
+        { error: `Nu s-a putut citi fișierul: ${msg}` },
        { status: 400 },
      );
    }

-    const boundaryBuf = Buffer.from(`--${boundary}`);
-
-    // Find the part that contains a filename (the file upload part).
-    // There may be multiple parts — we need the one with "filename=".
-    let fileStart = -1;
-    let searchFrom = 0;
-
-    while (searchFrom < rawBuf.length) {
-      const partStart = rawBuf.indexOf(boundaryBuf, searchFrom);
-      if (partStart === -1) break;
-
-      // Skip past boundary line to get to headers
-      const headersStart = rawBuf.indexOf(Buffer.from("\r\n"), partStart);
-      if (headersStart === -1) break;
-
-      const headerEnd = rawBuf.indexOf(Buffer.from("\r\n\r\n"), headersStart);
-      if (headerEnd === -1) break;
-
-      // Check if this part's headers contain a filename
-      const headers = rawBuf.subarray(headersStart, headerEnd).toString("utf8");
-      if (headers.includes("filename=")) {
-        fileStart = headerEnd + 4; // skip \r\n\r\n
-        break;
-      }
-
-      // Move past this part's headers to search for next boundary
-      searchFrom = headerEnd + 4;
-    }
-
-    if (fileStart === -1) {
+    if (pdfBuffer.length < 100) {
      return NextResponse.json(
-        { error: "Lipsește fișierul PDF." },
+        { error: "Fișierul PDF este gol sau prea mic." },
        { status: 400 },
      );
    }

-    // Find the closing boundary after the file content.
-    // Search from the END of the buffer backwards, since the PDF binary
-    // could theoretically contain the boundary string by coincidence.
-    const closingMarker = Buffer.from(`\r\n--${boundary}`);
-    const fileEnd = rawBuf.lastIndexOf(closingMarker);
-    const pdfData = (fileEnd > fileStart)
-      ? rawBuf.subarray(fileStart, fileEnd)
-      : rawBuf.subarray(fileStart);
-
-    await writeFile(inputPath, pdfData);
-    const originalSize = pdfData.length;
-
-    // Clean up raw file early to free disk space
-    await unlink(rawPath).catch(() => {});
+    await writeFile(inputPath, pdfBuffer);
+    const originalSize = pdfBuffer.length;

    // Step 1: Ghostscript — aggressive image recompression + downsampling
    try {
-      await execFileAsync("gs", gsArgs(inputPath, gsOutputPath), {
-        timeout: 120_000,
-      });
+      const { stderr } = await execFileAsync(
+        "gs",
+        gsArgs(inputPath, gsOutputPath),
+        {
+          timeout: 300_000, // 5 min for very large files
+          maxBuffer: 10 * 1024 * 1024, // 10MB stderr buffer
+        },
+      );
+      if (stderr && stderr.includes("Error")) {
+        console.error("[PDF extreme] GS stderr:", stderr.slice(0, 500));
+      }
    } catch (gsErr) {
-      const msg = gsErr instanceof Error ? gsErr.message : "Ghostscript failed";
+      const msg =
+        gsErr instanceof Error ? gsErr.message : "Ghostscript failed";
      if (msg.includes("ENOENT") || msg.includes("not found")) {
        return NextResponse.json(
          {
@@ -208,8 +158,38 @@ export async function POST(req: NextRequest) {
          { status: 501 },
        );
      }
+      // Include stderr in error for debugging
+      const stderr =
+        gsErr && typeof gsErr === "object" && "stderr" in gsErr
+          ? String((gsErr as { stderr: unknown }).stderr).slice(0, 300)
+          : "";
      return NextResponse.json(
-        { error: `Ghostscript error: ${msg}` },
+        {
+          error: `Ghostscript error: ${msg.slice(0, 200)}${stderr ? ` — ${stderr}` : ""}`,
+        },
+        { status: 500 },
+      );
+    }
+
+    // Verify GS output is a valid non-empty PDF
+    let gsSize = 0;
+    try {
+      const gsStat = await import("fs/promises").then((fs) =>
+        fs.stat(gsOutputPath),
+      );
+      gsSize = gsStat.size;
+    } catch {
+      return NextResponse.json(
+        { error: "Ghostscript nu a produs fișier output." },
+        { status: 500 },
+      );
+    }
+
+    if (gsSize < 100) {
+      return NextResponse.json(
+        {
+          error: `Ghostscript a produs un fișier gol (${gsSize} bytes). PDF-ul poate conține elemente incompatibile.`,
+        },
        { status: 500 },
      );
    }
@@ -218,7 +198,7 @@ export async function POST(req: NextRequest) {
    let finalPath = gsOutputPath;
    try {
      await execFileAsync("qpdf", qpdfArgs(gsOutputPath, finalOutputPath), {
-        timeout: 30_000,
+        timeout: 60_000,
      });
      finalPath = finalOutputPath;
    } catch {
@@ -230,8 +210,7 @@ export async function POST(req: NextRequest) {

    // If compression made it bigger, return original
    if (compressedSize >= originalSize) {
-      const originalBuffer = await readFile(inputPath);
-      return new NextResponse(originalBuffer, {
+      return new NextResponse(new Uint8Array(pdfBuffer), {
        status: 200,
        headers: {
          "Content-Type": "application/pdf",
@@ -243,7 +222,7 @@ export async function POST(req: NextRequest) {
      });
    }

-    return new NextResponse(resultBuffer, {
+    return new NextResponse(new Uint8Array(resultBuffer), {
      status: 200,
      headers: {
        "Content-Type": "application/pdf",
@@ -7,17 +7,42 @@ const STIRLING_PDF_API_KEY =

 export async function POST(req: NextRequest) {
  try {
-    // Stream body directly to Stirling — avoids FormData re-serialization
-    // failure on large files ("Failed to parse body as FormData")
+    // Parse incoming form data — get file + optimizeLevel
+    let formData: FormData;
+    try {
+      formData = await req.formData();
+    } catch (parseErr) {
+      const msg =
+        parseErr instanceof Error ? parseErr.message : "Parse error";
+      return NextResponse.json(
+        { error: `Nu s-a putut citi formularul: ${msg}` },
+        { status: 400 },
+      );
+    }
+
+    const fileField = formData.get("fileInput");
+    if (!fileField || !(fileField instanceof Blob)) {
+      return NextResponse.json(
+        { error: "Lipsește fișierul PDF." },
+        { status: 400 },
+      );
+    }
+
+    const optimizeLevel = formData.get("optimizeLevel") ?? "3";
+    const originalSize = fileField.size;
+
+    // Build fresh FormData for Stirling
+    const stirlingForm = new FormData();
+    stirlingForm.append("fileInput", fileField, "input.pdf");
+    stirlingForm.append("optimizeLevel", String(optimizeLevel));
+
    const res = await fetch(`${STIRLING_PDF_URL}/api/v1/misc/compress-pdf`, {
      method: "POST",
      headers: {
        "X-API-KEY": STIRLING_PDF_API_KEY,
-        "Content-Type": req.headers.get("content-type") || "",
      },
-      body: req.body,
-      // @ts-expect-error duplex required for streaming request bodies in Node
-      duplex: "half",
+      body: stirlingForm,
+      signal: AbortSignal.timeout(300_000), // 5 min for large files
    });

    if (!res.ok) {
@@ -36,6 +61,8 @@ export async function POST(req: NextRequest) {
      headers: {
        "Content-Type": "application/pdf",
        "Content-Disposition": 'attachment; filename="compressed.pdf"',
+        "X-Original-Size": String(originalSize),
+        "X-Compressed-Size": String(buffer.length),
      },
    });
  } catch (err) {