refactor(pdf-compress): replace Ghostscript with qpdf + iLovePDF API

Ghostscript -sDEVICE=pdfwrite fundamentally re-encodes fonts, causing
garbled text regardless of parameters. This cannot be fixed.

New approach:
- Local: qpdf-only lossless structural optimization (5-30% savings,
  zero corruption risk — fonts and images completely untouched)
- Cloud: iLovePDF API integration (auth → start → upload → process →
  download) with 3 levels (recommended/extreme/low), proper image
  recompression without font corruption

Frontend: 3 modes (cloud recommended, cloud extreme, local lossless).
Docker: ILOVEPDF_PUBLIC_KEY env var added.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
AI Assistant
2026-03-13 17:50:46 +02:00
parent d75fcb1d1c
commit f5deccd8ea
4 changed files with 358 additions and 203 deletions
+2
View File
@@ -44,6 +44,8 @@ services:
# eTerra ANCPI (parcel-sync module)
- ETERRA_USERNAME=${ETERRA_USERNAME:-}
- ETERRA_PASSWORD=${ETERRA_PASSWORD:-}
# iLovePDF cloud compression (free: 250 files/month)
- ILOVEPDF_PUBLIC_KEY=${ILOVEPDF_PUBLIC_KEY:-}
# DWG-to-DXF sidecar
- DWG2DXF_URL=http://dwg2dxf:5001
# Email notifications (Brevo SMTP)
+293
View File
@@ -0,0 +1,293 @@
import { NextRequest, NextResponse } from "next/server";
/**
* iLovePDF API integration for PDF compression.
*
* Workflow: auth → start → upload → process → download
* Docs: https://www.iloveapi.com/docs/api-reference
*
* Env vars: ILOVEPDF_PUBLIC_KEY, ILOVEPDF_SECRET_KEY
* Free tier: 250 files/month
*/
const ILOVEPDF_PUBLIC_KEY = process.env.ILOVEPDF_PUBLIC_KEY ?? "";
const API_BASE = "https://api.ilovepdf.com/v1";
/**
* Extract the file binary from a raw multipart/form-data buffer.
*/
function extractFileFromMultipart(
raw: Buffer,
boundary: string,
): { buffer: Buffer; filename: string } | null {
const boundaryBuf = Buffer.from(`--${boundary}`);
const headerSep = Buffer.from("\r\n\r\n");
const crlf = Buffer.from("\r\n");
let searchFrom = 0;
while (searchFrom < raw.length) {
const partStart = raw.indexOf(boundaryBuf, searchFrom);
if (partStart === -1) break;
const lineEnd = raw.indexOf(crlf, partStart);
if (lineEnd === -1) break;
const headerEnd = raw.indexOf(headerSep, lineEnd);
if (headerEnd === -1) break;
const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
if (headers.includes("filename=")) {
const fileStart = headerEnd + 4;
// Extract original filename
const fnMatch = headers.match(/filename="([^"]+)"/);
const filename = fnMatch?.[1] ?? "input.pdf";
const closingMarker = Buffer.from(`\r\n--${boundary}`);
const fileEnd = raw.lastIndexOf(closingMarker);
const buffer =
fileEnd > fileStart
? raw.subarray(fileStart, fileEnd)
: raw.subarray(fileStart);
return { buffer, filename };
}
searchFrom = headerEnd + 4;
}
return null;
}
/**
* Extract a text field value from multipart body.
*/
function extractFieldFromMultipart(
raw: Buffer,
boundary: string,
fieldName: string,
): string | null {
const boundaryBuf = Buffer.from(`--${boundary}`);
const headerSep = Buffer.from("\r\n\r\n");
const crlf = Buffer.from("\r\n");
const namePattern = `name="${fieldName}"`;
let searchFrom = 0;
while (searchFrom < raw.length) {
const partStart = raw.indexOf(boundaryBuf, searchFrom);
if (partStart === -1) break;
const lineEnd = raw.indexOf(crlf, partStart);
if (lineEnd === -1) break;
const headerEnd = raw.indexOf(headerSep, lineEnd);
if (headerEnd === -1) break;
const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
if (headers.includes(namePattern) && !headers.includes("filename=")) {
const valueStart = headerEnd + 4;
const nextBoundary = raw.indexOf(
Buffer.from(`\r\n--${boundary}`),
valueStart,
);
if (nextBoundary > valueStart) {
return raw.subarray(valueStart, nextBoundary).toString("utf8").trim();
}
}
searchFrom = headerEnd + 4;
}
return null;
}
export async function POST(req: NextRequest) {
if (!ILOVEPDF_PUBLIC_KEY) {
return NextResponse.json(
{
error:
"iLovePDF nu este configurat. Setează ILOVEPDF_PUBLIC_KEY în variabilele de mediu.",
},
{ status: 501 },
);
}
try {
// Parse multipart body
if (!req.body) {
return NextResponse.json(
{ error: "Lipsește fișierul PDF." },
{ status: 400 },
);
}
const rawBuf = Buffer.from(await req.arrayBuffer());
const contentType = req.headers.get("content-type") || "";
const boundaryMatch = contentType.match(
/boundary=(?:"([^"]+)"|([^\s;]+))/,
);
const boundary = boundaryMatch?.[1] ?? boundaryMatch?.[2] ?? "";
if (!boundary) {
return NextResponse.json(
{ error: "Invalid request." },
{ status: 400 },
);
}
const fileData = extractFileFromMultipart(rawBuf, boundary);
if (!fileData || fileData.buffer.length < 100) {
return NextResponse.json(
{ error: "Fișierul PDF nu a putut fi extras." },
{ status: 400 },
);
}
// Extract compression level (extreme / recommended / low)
const levelParam = extractFieldFromMultipart(rawBuf, boundary, "level");
const compressionLevel =
levelParam === "extreme"
? "extreme"
: levelParam === "low"
? "low"
: "recommended";
const originalSize = fileData.buffer.length;
// Step 1: Authenticate
const authRes = await fetch(`${API_BASE}/auth`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ public_key: ILOVEPDF_PUBLIC_KEY }),
});
if (!authRes.ok) {
const text = await authRes.text().catch(() => "");
return NextResponse.json(
{ error: `iLovePDF auth failed: ${authRes.status}${text}` },
{ status: 502 },
);
}
const { token } = (await authRes.json()) as { token: string };
// Step 2: Start compress task
const startRes = await fetch(`${API_BASE}/start/compress`, {
method: "GET",
headers: { Authorization: `Bearer ${token}` },
});
if (!startRes.ok) {
const text = await startRes.text().catch(() => "");
return NextResponse.json(
{ error: `iLovePDF start failed: ${startRes.status}${text}` },
{ status: 502 },
);
}
const { server, task } = (await startRes.json()) as {
server: string;
task: string;
};
// Step 3: Upload file
const uploadForm = new FormData();
uploadForm.append("task", task);
uploadForm.append(
"file",
new Blob([new Uint8Array(fileData.buffer)], { type: "application/pdf" }),
fileData.filename,
);
const uploadRes = await fetch(`https://${server}/v1/upload`, {
method: "POST",
headers: { Authorization: `Bearer ${token}` },
body: uploadForm,
signal: AbortSignal.timeout(300_000), // 5 min for large files
});
if (!uploadRes.ok) {
const text = await uploadRes.text().catch(() => "");
return NextResponse.json(
{ error: `iLovePDF upload failed: ${uploadRes.status}${text}` },
{ status: 502 },
);
}
const { server_filename } = (await uploadRes.json()) as {
server_filename: string;
};
// Step 4: Process
const processRes = await fetch(`https://${server}/v1/process`, {
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
task,
tool: "compress",
compression_level: compressionLevel,
files: [
{
server_filename,
filename: fileData.filename,
},
],
}),
signal: AbortSignal.timeout(300_000),
});
if (!processRes.ok) {
const text = await processRes.text().catch(() => "");
return NextResponse.json(
{ error: `iLovePDF process failed: ${processRes.status}${text}` },
{ status: 502 },
);
}
// Step 5: Download result
const downloadRes = await fetch(
`https://${server}/v1/download/${task}`,
{
headers: { Authorization: `Bearer ${token}` },
signal: AbortSignal.timeout(300_000),
},
);
if (!downloadRes.ok) {
const text = await downloadRes.text().catch(() => "");
return NextResponse.json(
{
error: `iLovePDF download failed: ${downloadRes.status}${text}`,
},
{ status: 502 },
);
}
const resultBlob = await downloadRes.blob();
const resultBuffer = Buffer.from(await resultBlob.arrayBuffer());
const compressedSize = resultBuffer.length;
// Clean up task on iLovePDF
fetch(`https://${server}/v1/task/${task}`, {
method: "DELETE",
headers: { Authorization: `Bearer ${token}` },
}).catch(() => {});
return new NextResponse(new Uint8Array(resultBuffer), {
status: 200,
headers: {
"Content-Type": "application/pdf",
"Content-Disposition": `attachment; filename="${fileData.filename.replace(/\.pdf$/i, "-comprimat.pdf")}"`,
"X-Original-Size": String(originalSize),
"X-Compressed-Size": String(compressedSize),
},
});
} catch (err) {
const message = err instanceof Error ? err.message : "Unknown error";
return NextResponse.json(
{ error: `Eroare iLovePDF: ${message}` },
{ status: 500 },
);
}
}
+31 -186
View File
@@ -8,81 +8,9 @@ import { tmpdir } from "os";
const execFileAsync = promisify(execFile);
// Ghostscript args for PDF compression.
//
// CRITICAL: Do NOT use -dPDFSETTINGS=/screen — it overrides font encoding
// and produces garbled text. Instead, set each parameter individually so we
// only compress IMAGES while keeping fonts and text intact.
//
// Strategy: recompress all raster images to JPEG at quality ~40-50,
// downsample to 150 DPI, deduplicate, compress streams. Fonts untouched.
function gsArgs(
input: string,
output: string,
level: "extreme" | "high" | "balanced",
): string[] {
// Quality presets — only affect images, never fonts
const presets = {
extreme: { dpi: 100, qfactor: 1.2 }, // ~quality 35, aggressive
high: { dpi: 150, qfactor: 0.76 }, // ~quality 50, good balance
balanced: { dpi: 200, qfactor: 0.4 }, // ~quality 70, minimal loss
};
const { dpi, qfactor } = presets[level];
return [
"-sDEVICE=pdfwrite",
"-dCompatibilityLevel=1.5",
"-dNOPAUSE",
"-dBATCH",
`-sOutputFile=${output}`,
// ── Image recompression (the main size reducer) ──
// Force re-encode of existing JPEGs — without this, GS passes them through
"-dPassThroughJPEGImages=false",
"-dPassThroughJPXImages=false",
// Use DCT (JPEG) for all color/gray images
"-dAutoFilterColorImages=false",
"-dAutoFilterGrayImages=false",
"-dColorImageFilter=/DCTEncode",
"-dGrayImageFilter=/DCTEncode",
"-dEncodeColorImages=true",
"-dEncodeGrayImages=true",
// ── Downsampling ──
"-dDownsampleColorImages=true",
"-dDownsampleGrayImages=true",
"-dDownsampleMonoImages=true",
`-dColorImageResolution=${dpi}`,
`-dGrayImageResolution=${dpi}`,
`-dMonoImageResolution=${Math.max(dpi, 200)}`, // mono needs higher DPI
"-dColorImageDownsampleType=/Bicubic",
"-dGrayImageDownsampleType=/Bicubic",
"-dColorImageDownsampleThreshold=1.0",
"-dGrayImageDownsampleThreshold=1.0",
"-dMonoImageDownsampleThreshold=1.0",
// ── Font handling — PRESERVE everything ──
"-dSubsetFonts=true", // subset is safe — keeps encoding, reduces size
"-dEmbedAllFonts=true", // ensure all fonts stay embedded
"-dCompressFonts=true",
// ── Structure / stream optimization ──
"-dCompressStreams=true",
"-dDetectDuplicateImages=true",
"-sColorConversionStrategy=RGB", // CMYK→RGB saves ~25% on CMYK images
// ── JPEG quality dictionaries ──
"-c",
`<< /ColorACSImageDict << /QFactor ${qfactor} /Blend 1 /ColorTransform 1 /HSamples [2 1 1 2] /VSamples [2 1 1 2] >> >> setdistillerparams`,
`<< /GrayACSImageDict << /QFactor ${qfactor} /Blend 1 /HSamples [2 1 1 2] /VSamples [2 1 1 2] >> >> setdistillerparams`,
`<< /ColorImageDict << /QFactor ${qfactor} /Blend 1 /ColorTransform 1 /HSamples [2 1 1 2] /VSamples [2 1 1 2] >> >> setdistillerparams`,
`<< /GrayImageDict << /QFactor ${qfactor} /Blend 1 /HSamples [2 1 1 2] /VSamples [2 1 1 2] >> >> setdistillerparams`,
"-f",
input,
];
}
// qpdf args for structure polish (5-15% additional saving)
// qpdf-only compression: lossless structural optimization.
// Does NOT re-encode fonts or images — zero risk of corruption.
// Typical savings: 5-30% depending on PDF structure.
function qpdfArgs(input: string, output: string): string[] {
return [
input,
@@ -115,73 +43,31 @@ function extractFileFromMultipart(
const partStart = raw.indexOf(boundaryBuf, searchFrom);
if (partStart === -1) break;
// Find end of boundary line
const lineEnd = raw.indexOf(crlf, partStart);
if (lineEnd === -1) break;
// Find blank line separating headers from body
const headerEnd = raw.indexOf(headerSep, lineEnd);
if (headerEnd === -1) break;
// Check if this part has a filename
const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
if (headers.includes("filename=")) {
const fileStart = headerEnd + 4; // skip \r\n\r\n
const fileStart = headerEnd + 4;
// Find closing boundary — search from end to avoid false matches inside PDF
const closingMarker = Buffer.from(`\r\n--${boundary}`);
const fileEnd = raw.lastIndexOf(closingMarker);
if (fileEnd > fileStart) {
return raw.subarray(fileStart, fileEnd);
}
// Fallback: no closing boundary found, take everything after headers
return raw.subarray(fileStart);
}
// Skip past this part
searchFrom = headerEnd + 4;
}
return null;
}
/**
* Extract a simple text field value from a multipart body.
* Returns null if the field is not found.
*/
function extractFieldFromMultipart(
raw: Buffer,
boundary: string,
fieldName: string,
): string | null {
const boundaryBuf = Buffer.from(`--${boundary}`);
const headerSep = Buffer.from("\r\n\r\n");
const crlf = Buffer.from("\r\n");
const namePattern = `name="${fieldName}"`;
let searchFrom = 0;
while (searchFrom < raw.length) {
const partStart = raw.indexOf(boundaryBuf, searchFrom);
if (partStart === -1) break;
const lineEnd = raw.indexOf(crlf, partStart);
if (lineEnd === -1) break;
const headerEnd = raw.indexOf(headerSep, lineEnd);
if (headerEnd === -1) break;
const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8");
if (headers.includes(namePattern) && !headers.includes("filename=")) {
const valueStart = headerEnd + 4;
const nextBoundary = raw.indexOf(Buffer.from(`\r\n--${boundary}`), valueStart);
if (nextBoundary > valueStart) {
return raw.subarray(valueStart, nextBoundary).toString("utf8").trim();
}
}
searchFrom = headerEnd + 4;
}
return null;
}
async function cleanup(dir: string) {
try {
const { readdir } = await import("fs/promises");
@@ -197,16 +83,13 @@ async function cleanup(dir: string) {
}
export async function POST(req: NextRequest) {
const tmpDir = join(tmpdir(), `pdf-extreme-${randomUUID()}`);
const tmpDir = join(tmpdir(), `pdf-qpdf-${randomUUID()}`);
try {
await mkdir(tmpDir, { recursive: true });
const inputPath = join(tmpDir, "input.pdf");
const gsOutputPath = join(tmpDir, "gs-output.pdf");
const finalOutputPath = join(tmpDir, "final.pdf");
const outputPath = join(tmpDir, "output.pdf");
// Collect raw body via arrayBuffer() — more reliable than formData() for
// large files, and more reliable than Readable.fromWeb streaming to disk.
if (!req.body) {
return NextResponse.json(
{ error: "Lipsește fișierul PDF." },
@@ -216,7 +99,6 @@ export async function POST(req: NextRequest) {
const rawBuf = Buffer.from(await req.arrayBuffer());
// Extract PDF from multipart body
const contentType = req.headers.get("content-type") || "";
const boundaryMatch = contentType.match(
/boundary=(?:"([^"]+)"|([^\s;]+))/,
@@ -239,86 +121,50 @@ export async function POST(req: NextRequest) {
);
}
// Extract compression level from multipart (optional "level" field)
const levelParam = extractFieldFromMultipart(rawBuf, boundary, "level");
const level: "extreme" | "high" | "balanced" =
levelParam === "high" ? "high" :
levelParam === "balanced" ? "balanced" : "extreme";
await writeFile(inputPath, pdfBuffer);
const originalSize = pdfBuffer.length;
// Step 1: Ghostscript — image recompression + downsampling (fonts untouched)
// qpdf: lossless structural optimization — fonts and images untouched
try {
const { stderr } = await execFileAsync(
"gs",
gsArgs(inputPath, gsOutputPath, level),
{
timeout: 300_000, // 5 min for very large files
maxBuffer: 10 * 1024 * 1024, // 10MB stderr buffer
},
);
if (stderr && stderr.includes("Error")) {
console.error("[PDF extreme] GS stderr:", stderr.slice(0, 500));
}
} catch (gsErr) {
await execFileAsync("qpdf", qpdfArgs(inputPath, outputPath), {
timeout: 120_000,
maxBuffer: 10 * 1024 * 1024,
});
} catch (qpdfErr) {
const msg =
gsErr instanceof Error ? gsErr.message : "Ghostscript failed";
qpdfErr instanceof Error ? qpdfErr.message : "qpdf failed";
if (msg.includes("ENOENT") || msg.includes("not found")) {
return NextResponse.json(
{
error:
"Ghostscript nu este instalat pe server. Trebuie adăugat `ghostscript` în Dockerfile.",
},
{ error: "qpdf nu este instalat pe server." },
{ status: 501 },
);
}
// Include stderr in error for debugging
const stderr =
gsErr && typeof gsErr === "object" && "stderr" in gsErr
? String((gsErr as { stderr: unknown }).stderr).slice(0, 300)
: "";
// qpdf returns exit code 3 for warnings — output is still valid
const exitCode =
qpdfErr && typeof qpdfErr === "object" && "code" in qpdfErr
? (qpdfErr as { code: number }).code
: null;
if (exitCode !== 3) {
return NextResponse.json(
{
error: `Ghostscript error: ${msg.slice(0, 200)}${stderr ? `${stderr}` : ""}`,
},
{ error: `qpdf error: ${msg.slice(0, 300)}` },
{ status: 500 },
);
}
}
// Verify GS output is a valid non-empty PDF
let gsSize = 0;
// Verify output exists
let outputSize = 0;
try {
const gsStat = await stat(gsOutputPath);
gsSize = gsStat.size;
const s = await stat(outputPath);
outputSize = s.size;
} catch {
return NextResponse.json(
{ error: "Ghostscript nu a produs fișier output." },
{ error: "qpdf nu a produs fișier output." },
{ status: 500 },
);
}
if (gsSize < 100) {
return NextResponse.json(
{
error: `Ghostscript a produs un fișier gol (${gsSize} bytes). PDF-ul poate conține elemente incompatibile.`,
},
{ status: 500 },
);
}
// Step 2: qpdf — structure optimization + linearization
let finalPath = gsOutputPath;
try {
await execFileAsync("qpdf", qpdfArgs(gsOutputPath, finalOutputPath), {
timeout: 60_000,
});
finalPath = finalOutputPath;
} catch {
// qpdf failed or not installed — GS output is still good
}
const resultBuffer = await readFile(finalPath);
const resultBuffer = await readFile(outputPath);
const compressedSize = resultBuffer.length;
// If compression made it bigger, return original
@@ -327,8 +173,7 @@ export async function POST(req: NextRequest) {
status: 200,
headers: {
"Content-Type": "application/pdf",
"Content-Disposition":
'attachment; filename="compressed-extreme.pdf"',
"Content-Disposition": 'attachment; filename="optimized.pdf"',
"X-Original-Size": String(originalSize),
"X-Compressed-Size": String(originalSize),
},
@@ -339,7 +184,7 @@ export async function POST(req: NextRequest) {
status: 200,
headers: {
"Content-Type": "application/pdf",
"Content-Disposition": 'attachment; filename="compressed-extreme.pdf"',
"Content-Disposition": 'attachment; filename="optimized.pdf"',
"X-Original-Size": String(originalSize),
"X-Compressed-Size": String(compressedSize),
},
@@ -347,7 +192,7 @@ export async function POST(req: NextRequest) {
} catch (err) {
const message = err instanceof Error ? err.message : "Unknown error";
return NextResponse.json(
{ error: `Eroare la compresia extremă: ${message}` },
{ error: `Eroare la optimizare: ${message}` },
{ status: 500 },
);
} finally {
@@ -1638,7 +1638,7 @@ function formatBytes(bytes: number) {
function PdfReducer() {
const [file, setFile] = useState<File | null>(null);
const [mode, setMode] = useState<"extreme" | "max" | "balanced">("extreme");
const [mode, setMode] = useState<"cloud-extreme" | "cloud-recommended" | "local">("cloud-recommended");
const [loading, setLoading] = useState(false);
const [error, setError] = useState("");
const [result, setResult] = useState<{
@@ -1662,10 +1662,21 @@ function PdfReducer() {
try {
const formData = new FormData();
formData.append("fileInput", file);
// All modes use the GS endpoint with a level parameter
formData.append("level", mode === "extreme" ? "extreme" : mode === "max" ? "high" : "balanced");
const res = await fetch("/api/compress-pdf/extreme", {
let endpoint: string;
if (mode === "local") {
// qpdf-only: lossless structural optimization
endpoint = "/api/compress-pdf/extreme";
} else {
// iLovePDF cloud compression
endpoint = "/api/compress-pdf/cloud";
formData.append(
"level",
mode === "cloud-extreme" ? "extreme" : "recommended",
);
}
const res = await fetch(endpoint, {
method: "POST",
body: formData,
});
@@ -1755,22 +1766,26 @@ function PdfReducer() {
<select
value={mode}
onChange={(e) =>
setMode(e.target.value as "extreme" | "max" | "balanced")
setMode(e.target.value as "cloud-extreme" | "cloud-recommended" | "local")
}
className="mt-1 w-full rounded-md border bg-background px-3 py-2 text-sm"
>
<option value="extreme">
Extremă imagini 100 DPI, calitate scăzută
<option value="cloud-recommended">
iLovePDF compresie recomandat
</option>
<option value="cloud-extreme">
iLovePDF compresie extremă
</option>
<option value="local">
💻 Local optimizare structurală (fără pierderi)
</option>
<option value="max">Puternică imagini 150 DPI, calitate medie (recomandat)</option>
<option value="balanced">Echilibrată imagini 200 DPI, calitate bună</option>
</select>
<p className="text-xs text-muted-foreground">
{mode === "extreme"
? "Reduce maxim dimensiunea. Imaginile pot pierde detalii fine."
: mode === "max"
? "Balanță bună între dimensiune și calitate. Recomandat pentru majoritatea fișierelor."
: "Pierdere minimă de calitate. Ideal pentru documente cu grafice detaliate."}
{mode === "cloud-recommended"
? "Compresie cloud cu calitate bună. Reduce semnificativ imaginile păstrând lizibilitatea."
: mode === "cloud-extreme"
? "Compresie maximă cloud. Imagini reduse agresiv — ideal pentru arhivare."
: "Optimizare locală cu qpdf (lossless). Zero pierdere de calitate, reducere 5-30%."}
</p>
</div>