refactor(pdf-compress): replace Ghostscript with qpdf + iLovePDF API

Ghostscript -sDEVICE=pdfwrite fundamentally re-encodes fonts, causing garbled text regardless of parameters. This cannot be fixed. New approach: - Local: qpdf-only lossless structural optimization (5-30% savings, zero corruption risk — fonts and images completely untouched) - Cloud: iLovePDF API integration (auth → start → upload → process → download) with 3 levels (recommended/extreme/low), proper image recompression without font corruption Frontend: 3 modes (cloud recommended, cloud extreme, local lossless). Docker: ILOVEPDF_PUBLIC_KEY env var added. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 17:50:46 +02:00
parent d75fcb1d1c
commit f5deccd8ea
4 changed files with 358 additions and 203 deletions
@@ -8,81 +8,9 @@ import { tmpdir } from "os";

 const execFileAsync = promisify(execFile);

-// Ghostscript args for PDF compression.
-//
-// CRITICAL: Do NOT use -dPDFSETTINGS=/screen — it overrides font encoding
-// and produces garbled text. Instead, set each parameter individually so we
-// only compress IMAGES while keeping fonts and text intact.
-//
-// Strategy: recompress all raster images to JPEG at quality ~40-50,
-// downsample to 150 DPI, deduplicate, compress streams. Fonts untouched.
-function gsArgs(
-  input: string,
-  output: string,
-  level: "extreme" | "high" | "balanced",
-): string[] {
-  // Quality presets — only affect images, never fonts
-  const presets = {
-    extreme: { dpi: 100, qfactor: 1.2 }, // ~quality 35, aggressive
-    high: { dpi: 150, qfactor: 0.76 }, // ~quality 50, good balance
-    balanced: { dpi: 200, qfactor: 0.4 }, // ~quality 70, minimal loss
-  };
-  const { dpi, qfactor } = presets[level];
-
-  return [
-    "-sDEVICE=pdfwrite",
-    "-dCompatibilityLevel=1.5",
-    "-dNOPAUSE",
-    "-dBATCH",
-    `-sOutputFile=${output}`,
-
-    // ── Image recompression (the main size reducer) ──
-    // Force re-encode of existing JPEGs — without this, GS passes them through
-    "-dPassThroughJPEGImages=false",
-    "-dPassThroughJPXImages=false",
-    // Use DCT (JPEG) for all color/gray images
-    "-dAutoFilterColorImages=false",
-    "-dAutoFilterGrayImages=false",
-    "-dColorImageFilter=/DCTEncode",
-    "-dGrayImageFilter=/DCTEncode",
-    "-dEncodeColorImages=true",
-    "-dEncodeGrayImages=true",
-
-    // ── Downsampling ──
-    "-dDownsampleColorImages=true",
-    "-dDownsampleGrayImages=true",
-    "-dDownsampleMonoImages=true",
-    `-dColorImageResolution=${dpi}`,
-    `-dGrayImageResolution=${dpi}`,
-    `-dMonoImageResolution=${Math.max(dpi, 200)}`, // mono needs higher DPI
-    "-dColorImageDownsampleType=/Bicubic",
-    "-dGrayImageDownsampleType=/Bicubic",
-    "-dColorImageDownsampleThreshold=1.0",
-    "-dGrayImageDownsampleThreshold=1.0",
-    "-dMonoImageDownsampleThreshold=1.0",
-
-    // ── Font handling — PRESERVE everything ──
-    "-dSubsetFonts=true", // subset is safe — keeps encoding, reduces size
-    "-dEmbedAllFonts=true", // ensure all fonts stay embedded
-    "-dCompressFonts=true",
-
-    // ── Structure / stream optimization ──
-    "-dCompressStreams=true",
-    "-dDetectDuplicateImages=true",
-    "-sColorConversionStrategy=RGB", // CMYK→RGB saves ~25% on CMYK images
-
-    // ── JPEG quality dictionaries ──
-    "-c",
-    `<< /ColorACSImageDict << /QFactor ${qfactor} /Blend 1 /ColorTransform 1 /HSamples [2 1 1 2] /VSamples [2 1 1 2] >> >> setdistillerparams`,
-    `<< /GrayACSImageDict  << /QFactor ${qfactor} /Blend 1 /HSamples [2 1 1 2] /VSamples [2 1 1 2] >> >> setdistillerparams`,
-    `<< /ColorImageDict    << /QFactor ${qfactor} /Blend 1 /ColorTransform 1 /HSamples [2 1 1 2] /VSamples [2 1 1 2] >> >> setdistillerparams`,
-    `<< /GrayImageDict     << /QFactor ${qfactor} /Blend 1 /HSamples [2 1 1 2] /VSamples [2 1 1 2] >> >> setdistillerparams`,
-    "-f",
-    input,
-  ];
-}
-
-// qpdf args for structure polish (5-15% additional saving)
+// qpdf-only compression: lossless structural optimization.
+// Does NOT re-encode fonts or images — zero risk of corruption.
+// Typical savings: 5-30% depending on PDF structure.
 function qpdfArgs(input: string, output: string): string[] {
  return [
    input,
@@ -115,73 +43,31 @@ function extractFileFromMultipart(
    const partStart = raw.indexOf(boundaryBuf, searchFrom);
    if (partStart === -1) break;

-    // Find end of boundary line
    const lineEnd = raw.indexOf(crlf, partStart);
    if (lineEnd === -1) break;

-    // Find blank line separating headers from body
    const headerEnd = raw.indexOf(headerSep, lineEnd);
    if (headerEnd === -1) break;

-    // Check if this part has a filename
    const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
    if (headers.includes("filename=")) {
-      const fileStart = headerEnd + 4; // skip \r\n\r\n
+      const fileStart = headerEnd + 4;

-      // Find closing boundary — search from end to avoid false matches inside PDF
      const closingMarker = Buffer.from(`\r\n--${boundary}`);
      const fileEnd = raw.lastIndexOf(closingMarker);

      if (fileEnd > fileStart) {
        return raw.subarray(fileStart, fileEnd);
      }
-      // Fallback: no closing boundary found, take everything after headers
      return raw.subarray(fileStart);
    }

-    // Skip past this part
    searchFrom = headerEnd + 4;
  }

  return null;
 }

-/**
- * Extract a simple text field value from a multipart body.
- * Returns null if the field is not found.
- */
-function extractFieldFromMultipart(
-  raw: Buffer,
-  boundary: string,
-  fieldName: string,
-): string | null {
-  const boundaryBuf = Buffer.from(`--${boundary}`);
-  const headerSep = Buffer.from("\r\n\r\n");
-  const crlf = Buffer.from("\r\n");
-  const namePattern = `name="${fieldName}"`;
-
-  let searchFrom = 0;
-  while (searchFrom < raw.length) {
-    const partStart = raw.indexOf(boundaryBuf, searchFrom);
-    if (partStart === -1) break;
-    const lineEnd = raw.indexOf(crlf, partStart);
-    if (lineEnd === -1) break;
-    const headerEnd = raw.indexOf(headerSep, lineEnd);
-    if (headerEnd === -1) break;
-
-    const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
-    if (headers.includes(namePattern) && !headers.includes("filename=")) {
-      const valueStart = headerEnd + 4;
-      const nextBoundary = raw.indexOf(Buffer.from(`\r\n--${boundary}`), valueStart);
-      if (nextBoundary > valueStart) {
-        return raw.subarray(valueStart, nextBoundary).toString("utf8").trim();
-      }
-    }
-    searchFrom = headerEnd + 4;
-  }
-  return null;
-}
-
 async function cleanup(dir: string) {
  try {
    const { readdir } = await import("fs/promises");
@@ -197,16 +83,13 @@ async function cleanup(dir: string) {
 }

 export async function POST(req: NextRequest) {
-  const tmpDir = join(tmpdir(), `pdf-extreme-${randomUUID()}`);
+  const tmpDir = join(tmpdir(), `pdf-qpdf-${randomUUID()}`);
  try {
    await mkdir(tmpDir, { recursive: true });

    const inputPath = join(tmpDir, "input.pdf");
-    const gsOutputPath = join(tmpDir, "gs-output.pdf");
-    const finalOutputPath = join(tmpDir, "final.pdf");
+    const outputPath = join(tmpDir, "output.pdf");

-    // Collect raw body via arrayBuffer() — more reliable than formData() for
-    // large files, and more reliable than Readable.fromWeb streaming to disk.
    if (!req.body) {
      return NextResponse.json(
        { error: "Lipsește fișierul PDF." },
@@ -216,7 +99,6 @@ export async function POST(req: NextRequest) {

    const rawBuf = Buffer.from(await req.arrayBuffer());

-    // Extract PDF from multipart body
    const contentType = req.headers.get("content-type") || "";
    const boundaryMatch = contentType.match(
      /boundary=(?:"([^"]+)"|([^\s;]+))/,
@@ -239,86 +121,50 @@ export async function POST(req: NextRequest) {
      );
    }

-    // Extract compression level from multipart (optional "level" field)
-    const levelParam = extractFieldFromMultipart(rawBuf, boundary, "level");
-    const level: "extreme" | "high" | "balanced" =
-      levelParam === "high" ? "high" :
-      levelParam === "balanced" ? "balanced" : "extreme";
-
    await writeFile(inputPath, pdfBuffer);
    const originalSize = pdfBuffer.length;

-    // Step 1: Ghostscript — image recompression + downsampling (fonts untouched)
+    // qpdf: lossless structural optimization — fonts and images untouched
    try {
-      const { stderr } = await execFileAsync(
-        "gs",
-        gsArgs(inputPath, gsOutputPath, level),
-        {
-          timeout: 300_000, // 5 min for very large files
-          maxBuffer: 10 * 1024 * 1024, // 10MB stderr buffer
-        },
-      );
-      if (stderr && stderr.includes("Error")) {
-        console.error("[PDF extreme] GS stderr:", stderr.slice(0, 500));
-      }
-    } catch (gsErr) {
+      await execFileAsync("qpdf", qpdfArgs(inputPath, outputPath), {
+        timeout: 120_000,
+        maxBuffer: 10 * 1024 * 1024,
+      });
+    } catch (qpdfErr) {
      const msg =
-        gsErr instanceof Error ? gsErr.message : "Ghostscript failed";
+        qpdfErr instanceof Error ? qpdfErr.message : "qpdf failed";
      if (msg.includes("ENOENT") || msg.includes("not found")) {
        return NextResponse.json(
-          {
-            error:
-              "Ghostscript nu este instalat pe server. Trebuie adăugat `ghostscript` în Dockerfile.",
-          },
+          { error: "qpdf nu este instalat pe server." },
          { status: 501 },
        );
      }
-      // Include stderr in error for debugging
-      const stderr =
-        gsErr && typeof gsErr === "object" && "stderr" in gsErr
-          ? String((gsErr as { stderr: unknown }).stderr).slice(0, 300)
-          : "";
-      return NextResponse.json(
-        {
-          error: `Ghostscript error: ${msg.slice(0, 200)}${stderr ? ` — ${stderr}` : ""}`,
-        },
-        { status: 500 },
-      );
+      // qpdf returns exit code 3 for warnings — output is still valid
+      const exitCode =
+        qpdfErr && typeof qpdfErr === "object" && "code" in qpdfErr
+          ? (qpdfErr as { code: number }).code
+          : null;
+      if (exitCode !== 3) {
+        return NextResponse.json(
+          { error: `qpdf error: ${msg.slice(0, 300)}` },
+          { status: 500 },
+        );
+      }
    }

-    // Verify GS output is a valid non-empty PDF
-    let gsSize = 0;
+    // Verify output exists
+    let outputSize = 0;
    try {
-      const gsStat = await stat(gsOutputPath);
-      gsSize = gsStat.size;
+      const s = await stat(outputPath);
+      outputSize = s.size;
    } catch {
      return NextResponse.json(
-        { error: "Ghostscript nu a produs fișier output." },
+        { error: "qpdf nu a produs fișier output." },
        { status: 500 },
      );
    }

-    if (gsSize < 100) {
-      return NextResponse.json(
-        {
-          error: `Ghostscript a produs un fișier gol (${gsSize} bytes). PDF-ul poate conține elemente incompatibile.`,
-        },
-        { status: 500 },
-      );
-    }
-
-    // Step 2: qpdf — structure optimization + linearization
-    let finalPath = gsOutputPath;
-    try {
-      await execFileAsync("qpdf", qpdfArgs(gsOutputPath, finalOutputPath), {
-        timeout: 60_000,
-      });
-      finalPath = finalOutputPath;
-    } catch {
-      // qpdf failed or not installed — GS output is still good
-    }
-
-    const resultBuffer = await readFile(finalPath);
+    const resultBuffer = await readFile(outputPath);
    const compressedSize = resultBuffer.length;

    // If compression made it bigger, return original
@@ -327,8 +173,7 @@ export async function POST(req: NextRequest) {
        status: 200,
        headers: {
          "Content-Type": "application/pdf",
-          "Content-Disposition":
-            'attachment; filename="compressed-extreme.pdf"',
+          "Content-Disposition": 'attachment; filename="optimized.pdf"',
          "X-Original-Size": String(originalSize),
          "X-Compressed-Size": String(originalSize),
        },
@@ -339,7 +184,7 @@ export async function POST(req: NextRequest) {
      status: 200,
      headers: {
        "Content-Type": "application/pdf",
-        "Content-Disposition": 'attachment; filename="compressed-extreme.pdf"',
+        "Content-Disposition": 'attachment; filename="optimized.pdf"',
        "X-Original-Size": String(originalSize),
        "X-Compressed-Size": String(compressedSize),
      },
@@ -347,7 +192,7 @@ export async function POST(req: NextRequest) {
  } catch (err) {
    const message = err instanceof Error ? err.message : "Unknown error";
    return NextResponse.json(
-      { error: `Eroare la compresia extremă: ${message}` },
+      { error: `Eroare la optimizare: ${message}` },
      { status: 500 },
    );
  } finally {