fix: extreme PDF compression producing empty output on large files
The multipart body parser was using the first \r\n\r\n as the file content start, but this could miss the actual file part. Now properly iterates through parts to find the one with filename= header, and uses lastIndexOf for the closing boundary to avoid false matches inside PDF binary data. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -120,18 +120,16 @@ export async function POST(req: NextRequest) {
|
|||||||
const nodeStream = Readable.fromWeb(req.body as import("stream/web").ReadableStream);
|
const nodeStream = Readable.fromWeb(req.body as import("stream/web").ReadableStream);
|
||||||
await pipeline(nodeStream, createWriteStream(rawPath));
|
await pipeline(nodeStream, createWriteStream(rawPath));
|
||||||
|
|
||||||
// Extract the PDF from multipart: find the double CRLF after headers,
|
// Extract the PDF binary from the multipart body.
|
||||||
// then read until the boundary marker before the end
|
// Multipart format:
|
||||||
|
// --boundary\r\n
|
||||||
|
// Content-Disposition: form-data; name="fileInput"; filename="x.pdf"\r\n
|
||||||
|
// Content-Type: application/pdf\r\n
|
||||||
|
// \r\n
|
||||||
|
// <FILE BYTES>
|
||||||
|
// \r\n--boundary--\r\n
|
||||||
const rawBuf = await readFile(rawPath);
|
const rawBuf = await readFile(rawPath);
|
||||||
const headerEnd = rawBuf.indexOf(Buffer.from("\r\n\r\n"));
|
|
||||||
if (headerEnd === -1) {
|
|
||||||
return NextResponse.json(
|
|
||||||
{ error: "Lipsește fișierul PDF." },
|
|
||||||
{ status: 400 },
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract boundary from Content-Type header
|
|
||||||
const contentType = req.headers.get("content-type") || "";
|
const contentType = req.headers.get("content-type") || "";
|
||||||
const boundaryMatch = contentType.match(/boundary=(?:"([^"]+)"|([^\s;]+))/);
|
const boundaryMatch = contentType.match(/boundary=(?:"([^"]+)"|([^\s;]+))/);
|
||||||
const boundary = boundaryMatch?.[1] ?? boundaryMatch?.[2] ?? "";
|
const boundary = boundaryMatch?.[1] ?? boundaryMatch?.[2] ?? "";
|
||||||
@@ -143,11 +141,50 @@ export async function POST(req: NextRequest) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// File content starts after first double CRLF, ends before closing boundary
|
const boundaryBuf = Buffer.from(`--${boundary}`);
|
||||||
const closingBoundary = Buffer.from(`\r\n--${boundary}`);
|
|
||||||
const fileStart = headerEnd + 4;
|
// Find the part that contains a filename (the file upload part).
|
||||||
const fileEnd = rawBuf.indexOf(closingBoundary, fileStart);
|
// There may be multiple parts — we need the one with "filename=".
|
||||||
const pdfData = fileEnd !== -1 ? rawBuf.subarray(fileStart, fileEnd) : rawBuf.subarray(fileStart);
|
let fileStart = -1;
|
||||||
|
let searchFrom = 0;
|
||||||
|
|
||||||
|
while (searchFrom < rawBuf.length) {
|
||||||
|
const partStart = rawBuf.indexOf(boundaryBuf, searchFrom);
|
||||||
|
if (partStart === -1) break;
|
||||||
|
|
||||||
|
// Skip past boundary line to get to headers
|
||||||
|
const headersStart = rawBuf.indexOf(Buffer.from("\r\n"), partStart);
|
||||||
|
if (headersStart === -1) break;
|
||||||
|
|
||||||
|
const headerEnd = rawBuf.indexOf(Buffer.from("\r\n\r\n"), headersStart);
|
||||||
|
if (headerEnd === -1) break;
|
||||||
|
|
||||||
|
// Check if this part's headers contain a filename
|
||||||
|
const headers = rawBuf.subarray(headersStart, headerEnd).toString("utf8");
|
||||||
|
if (headers.includes("filename=")) {
|
||||||
|
fileStart = headerEnd + 4; // skip \r\n\r\n
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move past this part's headers to search for next boundary
|
||||||
|
searchFrom = headerEnd + 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fileStart === -1) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "Lipsește fișierul PDF." },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the closing boundary after the file content.
|
||||||
|
// Search from the END of the buffer backwards, since the PDF binary
|
||||||
|
// could theoretically contain the boundary string by coincidence.
|
||||||
|
const closingMarker = Buffer.from(`\r\n--${boundary}`);
|
||||||
|
const fileEnd = rawBuf.lastIndexOf(closingMarker);
|
||||||
|
const pdfData = (fileEnd > fileStart)
|
||||||
|
? rawBuf.subarray(fileStart, fileEnd)
|
||||||
|
: rawBuf.subarray(fileStart);
|
||||||
|
|
||||||
await writeFile(inputPath, pdfData);
|
await writeFile(inputPath, pdfData);
|
||||||
const originalSize = pdfData.length;
|
const originalSize = pdfData.length;
|
||||||
|
|||||||
Reference in New Issue
Block a user