diff --git a/package-lock.json b/package-lock.json index 1f2b14c..4b67250 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "@prisma/client": "^6.19.2", "axios": "^1.13.6", "axios-cookiejar-support": "^6.0.5", + "busboy": "^1.6.0", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "docx": "^9.6.0", @@ -35,6 +36,7 @@ }, "devDependencies": { "@tailwindcss/postcss": "^4", + "@types/busboy": "^1.5.4", "@types/jszip": "^3.4.0", "@types/node": "^20", "@types/nodemailer": "^7.0.11", @@ -3995,6 +3997,16 @@ "tslib": "^2.4.0" } }, + "node_modules/@types/busboy": { + "version": "1.5.4", + "resolved": "https://registry.npmjs.org/@types/busboy/-/busboy-1.5.4.tgz", + "integrity": "sha512-kG7WrUuAKK0NoyxfQHsVE6j1m01s6kMma64E+OZenQABMQyTJop1DumUWcLwAQ2JzpefU7PDYoRDKl8uZosFjw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/estree": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", @@ -5303,6 +5315,17 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/busboy": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz", + "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==", + "dependencies": { + "streamsearch": "^1.1.0" + }, + "engines": { + "node": ">=10.16.0" + } + }, "node_modules/bytes": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", @@ -12144,6 +12167,14 @@ "stream-chain": "^2.2.5" } }, + "node_modules/streamsearch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz", + "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/strict-event-emitter": { "version": "0.5.1", "resolved": "https://registry.npmjs.org/strict-event-emitter/-/strict-event-emitter-0.5.1.tgz", diff --git a/package.json b/package.json index 77fe63c..54445c3 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "@prisma/client": "^6.19.2", "axios": "^1.13.6", "axios-cookiejar-support": "^6.0.5", + "busboy": "^1.6.0", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "docx": "^9.6.0", @@ -36,6 +37,7 @@ }, "devDependencies": { "@tailwindcss/postcss": "^4", + "@types/busboy": "^1.5.4", "@types/jszip": "^3.4.0", "@types/node": "^20", "@types/nodemailer": "^7.0.11", diff --git a/src/app/api/compress-pdf/cloud/route.ts b/src/app/api/compress-pdf/cloud/route.ts index 88d9f5f..ea63877 100644 --- a/src/app/api/compress-pdf/cloud/route.ts +++ b/src/app/api/compress-pdf/cloud/route.ts @@ -1,4 +1,7 @@ import { NextRequest, NextResponse } from "next/server"; +import { readFile, unlink } from "fs/promises"; +import { join } from "path"; +import { parseMultipartUpload } from "../parse-upload"; /** * iLovePDF API integration for PDF compression. @@ -6,97 +9,24 @@ import { NextRequest, NextResponse } from "next/server"; * Workflow: auth → start → upload → process → download * Docs: https://www.iloveapi.com/docs/api-reference * - * Env vars: ILOVEPDF_PUBLIC_KEY, ILOVEPDF_SECRET_KEY + * Env vars: ILOVEPDF_PUBLIC_KEY * Free tier: 250 files/month */ const ILOVEPDF_PUBLIC_KEY = process.env.ILOVEPDF_PUBLIC_KEY ?? ""; const API_BASE = "https://api.ilovepdf.com/v1"; -/** - * Extract the file binary from a raw multipart/form-data buffer. - */ -function extractFileFromMultipart( - raw: Buffer, - boundary: string, -): { buffer: Buffer; filename: string } | null { - const boundaryBuf = Buffer.from(`--${boundary}`); - const headerSep = Buffer.from("\r\n\r\n"); - const crlf = Buffer.from("\r\n"); - - let searchFrom = 0; - - while (searchFrom < raw.length) { - const partStart = raw.indexOf(boundaryBuf, searchFrom); - if (partStart === -1) break; - - const lineEnd = raw.indexOf(crlf, partStart); - if (lineEnd === -1) break; - - const headerEnd = raw.indexOf(headerSep, lineEnd); - if (headerEnd === -1) break; - - const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8"); - if (headers.includes("filename=")) { - const fileStart = headerEnd + 4; - - // Extract original filename - const fnMatch = headers.match(/filename="([^"]+)"/); - const filename = fnMatch?.[1] ?? "input.pdf"; - - const closingMarker = Buffer.from(`\r\n--${boundary}`); - const fileEnd = raw.lastIndexOf(closingMarker); - - const buffer = - fileEnd > fileStart - ? raw.subarray(fileStart, fileEnd) - : raw.subarray(fileStart); - - return { buffer, filename }; +async function cleanup(dir: string) { + try { + const { readdir, rmdir } = await import("fs/promises"); + const files = await readdir(dir); + for (const f of files) { + await unlink(join(dir, f)).catch(() => {}); } - - searchFrom = headerEnd + 4; + await rmdir(dir).catch(() => {}); + } catch { + // non-critical } - - return null; -} - -/** - * Extract a text field value from multipart body. - */ -function extractFieldFromMultipart( - raw: Buffer, - boundary: string, - fieldName: string, -): string | null { - const boundaryBuf = Buffer.from(`--${boundary}`); - const headerSep = Buffer.from("\r\n\r\n"); - const crlf = Buffer.from("\r\n"); - const namePattern = `name="${fieldName}"`; - - let searchFrom = 0; - while (searchFrom < raw.length) { - const partStart = raw.indexOf(boundaryBuf, searchFrom); - if (partStart === -1) break; - const lineEnd = raw.indexOf(crlf, partStart); - if (lineEnd === -1) break; - const headerEnd = raw.indexOf(headerSep, lineEnd); - if (headerEnd === -1) break; - - const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8"); - if (headers.includes(namePattern) && !headers.includes("filename=")) { - const valueStart = headerEnd + 4; - const nextBoundary = raw.indexOf( - Buffer.from(`\r\n--${boundary}`), - valueStart, - ); - if (nextBoundary > valueStart) { - return raw.subarray(valueStart, nextBoundary).toString("utf8").trim(); - } - } - searchFrom = headerEnd + 4; - } - return null; } export async function POST(req: NextRequest) { @@ -110,39 +40,23 @@ export async function POST(req: NextRequest) { ); } + let tmpDir = ""; try { - // Parse multipart body - if (!req.body) { + // Stream upload to disk — works for any file size + const upload = await parseMultipartUpload(req); + tmpDir = upload.tmpDir; + + const originalSize = upload.size; + + if (originalSize < 100) { return NextResponse.json( - { error: "Lipsește fișierul PDF." }, + { error: "Fișierul PDF este gol sau prea mic." }, { status: 400 }, ); } - const rawBuf = Buffer.from(await req.arrayBuffer()); - const contentType = req.headers.get("content-type") || ""; - const boundaryMatch = contentType.match( - /boundary=(?:"([^"]+)"|([^\s;]+))/, - ); - const boundary = boundaryMatch?.[1] ?? boundaryMatch?.[2] ?? ""; - - if (!boundary) { - return NextResponse.json( - { error: "Invalid request." }, - { status: 400 }, - ); - } - - const fileData = extractFileFromMultipart(rawBuf, boundary); - if (!fileData || fileData.buffer.length < 100) { - return NextResponse.json( - { error: "Fișierul PDF nu a putut fi extras." }, - { status: 400 }, - ); - } - - // Extract compression level (extreme / recommended / low) - const levelParam = extractFieldFromMultipart(rawBuf, boundary, "level"); + // Compression level from form field + const levelParam = upload.fields["level"] ?? ""; const compressionLevel = levelParam === "extreme" ? "extreme" @@ -150,8 +64,6 @@ export async function POST(req: NextRequest) { ? "low" : "recommended"; - const originalSize = fileData.buffer.length; - // Step 1: Authenticate const authRes = await fetch(`${API_BASE}/auth`, { method: "POST", @@ -188,20 +100,21 @@ export async function POST(req: NextRequest) { task: string; }; - // Step 3: Upload file + // Step 3: Upload file (read from disk to avoid double-buffering) + const fileBuffer = await readFile(upload.filePath); const uploadForm = new FormData(); uploadForm.append("task", task); uploadForm.append( "file", - new Blob([new Uint8Array(fileData.buffer)], { type: "application/pdf" }), - fileData.filename, + new Blob([new Uint8Array(fileBuffer)], { type: "application/pdf" }), + upload.filename, ); const uploadRes = await fetch(`https://${server}/v1/upload`, { method: "POST", headers: { Authorization: `Bearer ${token}` }, body: uploadForm, - signal: AbortSignal.timeout(300_000), // 5 min for large files + signal: AbortSignal.timeout(600_000), // 10 min for very large files }); if (!uploadRes.ok) { @@ -230,11 +143,11 @@ export async function POST(req: NextRequest) { files: [ { server_filename, - filename: fileData.filename, + filename: upload.filename, }, ], }), - signal: AbortSignal.timeout(300_000), + signal: AbortSignal.timeout(600_000), }); if (!processRes.ok) { @@ -250,7 +163,7 @@ export async function POST(req: NextRequest) { `https://${server}/v1/download/${task}`, { headers: { Authorization: `Bearer ${token}` }, - signal: AbortSignal.timeout(300_000), + signal: AbortSignal.timeout(600_000), }, ); @@ -268,7 +181,7 @@ export async function POST(req: NextRequest) { const resultBuffer = Buffer.from(await resultBlob.arrayBuffer()); const compressedSize = resultBuffer.length; - // Clean up task on iLovePDF + // Clean up task on iLovePDF (fire and forget) fetch(`https://${server}/v1/task/${task}`, { method: "DELETE", headers: { Authorization: `Bearer ${token}` }, @@ -278,7 +191,7 @@ export async function POST(req: NextRequest) { status: 200, headers: { "Content-Type": "application/pdf", - "Content-Disposition": `attachment; filename="${fileData.filename.replace(/\.pdf$/i, "-comprimat.pdf")}"`, + "Content-Disposition": `attachment; filename="${upload.filename.replace(/\.pdf$/i, "-comprimat.pdf")}"`, "X-Original-Size": String(originalSize), "X-Compressed-Size": String(compressedSize), }, @@ -289,5 +202,7 @@ export async function POST(req: NextRequest) { { error: `Eroare iLovePDF: ${message}` }, { status: 500 }, ); + } finally { + if (tmpDir) await cleanup(tmpDir); } } diff --git a/src/app/api/compress-pdf/extreme/route.ts b/src/app/api/compress-pdf/extreme/route.ts index 3f9f842..fec3000 100644 --- a/src/app/api/compress-pdf/extreme/route.ts +++ b/src/app/api/compress-pdf/extreme/route.ts @@ -1,10 +1,9 @@ import { NextRequest, NextResponse } from "next/server"; -import { writeFile, readFile, unlink, mkdir, stat } from "fs/promises"; +import { readFile, unlink, stat } from "fs/promises"; import { execFile } from "child_process"; import { promisify } from "util"; -import { randomUUID } from "crypto"; import { join } from "path"; -import { tmpdir } from "os"; +import { parseMultipartUpload } from "../parse-upload"; const execFileAsync = promisify(execFile); @@ -24,58 +23,13 @@ function qpdfArgs(input: string, output: string): string[] { ]; } -/** - * Extract the file binary from a raw multipart/form-data buffer. - * Finds the part whose Content-Disposition contains `filename=`, - * then returns the bytes between the header-end and the closing boundary. - */ -function extractFileFromMultipart( - raw: Buffer, - boundary: string, -): Buffer | null { - const boundaryBuf = Buffer.from(`--${boundary}`); - const headerSep = Buffer.from("\r\n\r\n"); - const crlf = Buffer.from("\r\n"); - - let searchFrom = 0; - - while (searchFrom < raw.length) { - const partStart = raw.indexOf(boundaryBuf, searchFrom); - if (partStart === -1) break; - - const lineEnd = raw.indexOf(crlf, partStart); - if (lineEnd === -1) break; - - const headerEnd = raw.indexOf(headerSep, lineEnd); - if (headerEnd === -1) break; - - const headers = raw.subarray(lineEnd + 2, headerEnd).toString("utf8"); - if (headers.includes("filename=")) { - const fileStart = headerEnd + 4; - - const closingMarker = Buffer.from(`\r\n--${boundary}`); - const fileEnd = raw.lastIndexOf(closingMarker); - - if (fileEnd > fileStart) { - return raw.subarray(fileStart, fileEnd); - } - return raw.subarray(fileStart); - } - - searchFrom = headerEnd + 4; - } - - return null; -} - async function cleanup(dir: string) { try { - const { readdir } = await import("fs/promises"); + const { readdir, rmdir } = await import("fs/promises"); const files = await readdir(dir); for (const f of files) { await unlink(join(dir, f)).catch(() => {}); } - const { rmdir } = await import("fs/promises"); await rmdir(dir).catch(() => {}); } catch { // cleanup failure is non-critical @@ -83,51 +37,27 @@ async function cleanup(dir: string) { } export async function POST(req: NextRequest) { - const tmpDir = join(tmpdir(), `pdf-qpdf-${randomUUID()}`); + let tmpDir = ""; try { - await mkdir(tmpDir, { recursive: true }); + // Stream upload to disk — works for any file size + const upload = await parseMultipartUpload(req); + tmpDir = upload.tmpDir; - const inputPath = join(tmpDir, "input.pdf"); - const outputPath = join(tmpDir, "output.pdf"); + const inputPath = upload.filePath; + const outputPath = join(upload.tmpDir, "output.pdf"); + const originalSize = upload.size; - if (!req.body) { + if (originalSize < 100) { return NextResponse.json( - { error: "Lipsește fișierul PDF." }, + { error: "Fișierul PDF este gol sau prea mic." }, { status: 400 }, ); } - const rawBuf = Buffer.from(await req.arrayBuffer()); - - const contentType = req.headers.get("content-type") || ""; - const boundaryMatch = contentType.match( - /boundary=(?:"([^"]+)"|([^\s;]+))/, - ); - const boundary = boundaryMatch?.[1] ?? boundaryMatch?.[2] ?? ""; - - if (!boundary) { - return NextResponse.json( - { error: "Invalid request — missing multipart boundary." }, - { status: 400 }, - ); - } - - const pdfBuffer = extractFileFromMultipart(rawBuf, boundary); - - if (!pdfBuffer || pdfBuffer.length < 100) { - return NextResponse.json( - { error: "Fișierul PDF este gol sau nu a putut fi extras." }, - { status: 400 }, - ); - } - - await writeFile(inputPath, pdfBuffer); - const originalSize = pdfBuffer.length; - // qpdf: lossless structural optimization — fonts and images untouched try { await execFileAsync("qpdf", qpdfArgs(inputPath, outputPath), { - timeout: 120_000, + timeout: 300_000, // 5 min for very large files maxBuffer: 10 * 1024 * 1024, }); } catch (qpdfErr) { @@ -139,7 +69,7 @@ export async function POST(req: NextRequest) { { status: 501 }, ); } - // qpdf returns exit code 3 for warnings — output is still valid + // qpdf exit code 3 = warnings, output is still valid const exitCode = qpdfErr && typeof qpdfErr === "object" && "code" in qpdfErr ? (qpdfErr as { code: number }).code @@ -153,10 +83,8 @@ export async function POST(req: NextRequest) { } // Verify output exists - let outputSize = 0; try { - const s = await stat(outputPath); - outputSize = s.size; + await stat(outputPath); } catch { return NextResponse.json( { error: "qpdf nu a produs fișier output." }, @@ -169,7 +97,8 @@ export async function POST(req: NextRequest) { // If compression made it bigger, return original if (compressedSize >= originalSize) { - return new NextResponse(new Uint8Array(pdfBuffer), { + const originalBuffer = await readFile(inputPath); + return new NextResponse(new Uint8Array(originalBuffer), { status: 200, headers: { "Content-Type": "application/pdf", @@ -196,6 +125,6 @@ export async function POST(req: NextRequest) { { status: 500 }, ); } finally { - await cleanup(tmpDir); + if (tmpDir) await cleanup(tmpDir); } } diff --git a/src/app/api/compress-pdf/parse-upload.ts b/src/app/api/compress-pdf/parse-upload.ts new file mode 100644 index 0000000..5e04383 --- /dev/null +++ b/src/app/api/compress-pdf/parse-upload.ts @@ -0,0 +1,124 @@ +/** + * Streaming multipart parser for large PDF uploads. + * + * Uses `busboy` to stream the file part directly to disk — never buffers the + * entire request body in memory. Works reliably for files of any size. + */ + +import { NextRequest } from "next/server"; +import { createWriteStream } from "fs"; +import { mkdir } from "fs/promises"; +import { randomUUID } from "crypto"; +import { join } from "path"; +import { tmpdir } from "os"; +import { Readable } from "stream"; +import Busboy from "busboy"; + +export interface ParsedUpload { + /** Absolute path to the extracted PDF on disk */ + filePath: string; + /** Original filename from the upload */ + filename: string; + /** File size in bytes */ + size: number; + /** Temp directory (caller should clean up) */ + tmpDir: string; + /** Any extra form fields (e.g. "level") */ + fields: Record; +} + +/** + * Parse a multipart/form-data request, streaming the file to a temp directory. + * Returns the path to the extracted file on disk + metadata. + */ +export function parseMultipartUpload(req: NextRequest): Promise { + return new Promise((resolve, reject) => { + const contentType = req.headers.get("content-type"); + if (!contentType || !req.body) { + return reject(new Error("Lipsește fișierul PDF.")); + } + + const tmpDir = join(tmpdir(), `pdf-upload-${randomUUID()}`); + const fields: Record = {}; + + let resolved = false; + + const bb = Busboy({ + headers: { "content-type": contentType }, + limits: { + fileSize: 500 * 1024 * 1024, // 500 MB max + files: 1, + }, + }); + + bb.on("field", (name: string, val: string) => { + fields[name] = val; + }); + + bb.on( + "file", + ( + _name: string, + file: NodeJS.ReadableStream, + info: { filename: string; encoding: string; mimeType: string }, + ) => { + const filename = info.filename || "input.pdf"; + const filePath = join(tmpDir, filename); + + // Ensure temp dir exists, then stream file to disk + mkdir(tmpDir, { recursive: true }) + .then(() => { + let size = 0; + const ws = createWriteStream(filePath); + + file.on("data", (chunk: Buffer) => { + size += chunk.length; + }); + + file.pipe(ws); + + ws.on("finish", () => { + if (!resolved) { + resolved = true; + resolve({ filePath, filename, size, tmpDir, fields }); + } + }); + + ws.on("error", (err) => { + if (!resolved) { + resolved = true; + reject(err); + } + }); + }) + .catch((err) => { + if (!resolved) { + resolved = true; + reject(err); + } + }); + }, + ); + + bb.on("error", (err: Error) => { + if (!resolved) { + resolved = true; + reject(err); + } + }); + + bb.on("close", () => { + // If no file was found in the upload + if (!resolved) { + resolved = true; + reject(new Error("Lipsește fișierul PDF din upload.")); + } + }); + + // Pipe the Web ReadableStream into busboy + const nodeStream = Readable.fromWeb( + req.body as import("stream/web").ReadableStream, + ); + nodeStream.pipe(bb); + }); +}