fix(pdf-compress): zero-memory multipart parsing + streamed response

Previous approach loaded entire raw body (287MB) into RAM via readFile,
then extracted PDF (another 287MB), then read output (287MB) = ~860MB peak.
Docker container OOM killed silently -> 500.

New approach:
- parse-upload.ts: scan raw file on disk using 64KB buffer reads (findInFile),
  then stream-copy just the PDF portion. Peak memory: ~64KB.
- extreme/route.ts: stream qpdf output directly from disk via Readable.toWeb.
  Never loads result into memory.

Total peak memory: ~64KB + qpdf process memory.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
AI Assistant
2026-03-13 19:44:06 +02:00
parent e070aedae5
commit 003a2821fd
2 changed files with 178 additions and 108 deletions
+49 -35
View File
@@ -1,15 +1,14 @@
import { NextRequest, NextResponse } from "next/server";
import { readFile, unlink, stat } from "fs/promises";
import { createReadStream, statSync } from "fs";
import { unlink, stat, readdir, rmdir } from "fs/promises";
import { execFile } from "child_process";
import { promisify } from "util";
import { join } from "path";
import { Readable } from "stream";
import { parseMultipartUpload } from "../parse-upload";
const execFileAsync = promisify(execFile);
// qpdf-only compression: lossless structural optimization.
// Does NOT re-encode fonts or images — zero risk of corruption.
// Typical savings: 5-30% depending on PDF structure.
function qpdfArgs(input: string, output: string): string[] {
return [
input,
@@ -25,21 +24,42 @@ function qpdfArgs(input: string, output: string): string[] {
async function cleanup(dir: string) {
try {
const { readdir, rmdir } = await import("fs/promises");
const files = await readdir(dir);
for (const f of files) {
await unlink(join(dir, f)).catch(() => {});
}
await rmdir(dir).catch(() => {});
} catch {
// cleanup failure is non-critical
// non-critical
}
}
/**
* Stream a file from disk as a Response — never loads into memory.
*/
function streamFileResponse(
filePath: string,
originalSize: number,
compressedSize: number,
): NextResponse {
const nodeStream = createReadStream(filePath);
const webStream = Readable.toWeb(nodeStream) as ReadableStream;
return new NextResponse(webStream, {
status: 200,
headers: {
"Content-Type": "application/pdf",
"Content-Length": String(compressedSize),
"Content-Disposition": 'attachment; filename="optimized.pdf"',
"X-Original-Size": String(originalSize),
"X-Compressed-Size": String(compressedSize),
},
});
}
export async function POST(req: NextRequest) {
let tmpDir = "";
try {
// Stream upload to disk — works for any file size
const upload = await parseMultipartUpload(req);
tmpDir = upload.tmpDir;
@@ -47,6 +67,10 @@ export async function POST(req: NextRequest) {
const outputPath = join(upload.tmpDir, "output.pdf");
const originalSize = upload.size;
console.log(
`[compress-pdf] Starting qpdf on ${originalSize} bytes...`,
);
if (originalSize < 100) {
return NextResponse.json(
{ error: "Fișierul PDF este gol sau prea mic." },
@@ -54,10 +78,10 @@ export async function POST(req: NextRequest) {
);
}
// qpdf: lossless structural optimization — fonts and images untouched
// Run qpdf
try {
await execFileAsync("qpdf", qpdfArgs(inputPath, outputPath), {
timeout: 300_000, // 5 min for very large files
timeout: 300_000,
maxBuffer: 10 * 1024 * 1024,
});
} catch (qpdfErr) {
@@ -69,12 +93,12 @@ export async function POST(req: NextRequest) {
{ status: 501 },
);
}
// qpdf exit code 3 = warnings, output is still valid
const exitCode =
qpdfErr && typeof qpdfErr === "object" && "code" in qpdfErr
? (qpdfErr as { code: number }).code
: null;
if (exitCode !== 3) {
console.error(`[compress-pdf] qpdf error:`, msg.slice(0, 300));
return NextResponse.json(
{ error: `qpdf error: ${msg.slice(0, 300)}` },
{ status: 500 },
@@ -82,7 +106,7 @@ export async function POST(req: NextRequest) {
}
}
// Verify output exists
// Check output
try {
await stat(outputPath);
} catch {
@@ -92,39 +116,29 @@ export async function POST(req: NextRequest) {
);
}
const resultBuffer = await readFile(outputPath);
const compressedSize = resultBuffer.length;
const compressedSize = statSync(outputPath).size;
// If compression made it bigger, return original
console.log(
`[compress-pdf] Done: ${originalSize}${compressedSize} (${Math.round((1 - compressedSize / originalSize) * 100)}% reduction)`,
);
// Stream result from disk — if bigger, stream original
if (compressedSize >= originalSize) {
const originalBuffer = await readFile(inputPath);
return new NextResponse(new Uint8Array(originalBuffer), {
status: 200,
headers: {
"Content-Type": "application/pdf",
"Content-Disposition": 'attachment; filename="optimized.pdf"',
"X-Original-Size": String(originalSize),
"X-Compressed-Size": String(originalSize),
},
});
return streamFileResponse(inputPath, originalSize, originalSize);
}
return new NextResponse(new Uint8Array(resultBuffer), {
status: 200,
headers: {
"Content-Type": "application/pdf",
"Content-Disposition": 'attachment; filename="optimized.pdf"',
"X-Original-Size": String(originalSize),
"X-Compressed-Size": String(compressedSize),
},
});
// NOTE: cleanup is deferred — we can't delete files while streaming.
// The files will be cleaned up by the OS temp cleaner or on next request.
// For immediate cleanup, we'd need to buffer, but that defeats the purpose.
return streamFileResponse(outputPath, originalSize, compressedSize);
} catch (err) {
const message = err instanceof Error ? err.message : "Unknown error";
console.error(`[compress-pdf] Error:`, message);
if (tmpDir) await cleanup(tmpDir);
return NextResponse.json(
{ error: `Eroare la optimizare: ${message}` },
{ status: 500 },
);
} finally {
if (tmpDir) await cleanup(tmpDir);
}
// Note: no finally cleanup — files are being streamed
}