fix(mini-utilities): Stirling PDF API key auth, Tesseract.js OCR, emoji removal in cleaner

This commit is contained in:
AI Assistant
2026-02-19 00:43:05 +02:00
parent 3154eb7f4a
commit 41036db659
4 changed files with 252 additions and 23 deletions

115
package-lock.json generated
View File

@@ -18,6 +18,7 @@
"react": "19.2.3", "react": "19.2.3",
"react-dom": "19.2.3", "react-dom": "19.2.3",
"tailwind-merge": "^3.4.1", "tailwind-merge": "^3.4.1",
"tesseract.js": "^7.0.0",
"uuid": "^13.0.0" "uuid": "^13.0.0"
}, },
"devDependencies": { "devDependencies": {
@@ -4921,6 +4922,12 @@
"baseline-browser-mapping": "dist/cli.js" "baseline-browser-mapping": "dist/cli.js"
} }
}, },
"node_modules/bmp-js": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/bmp-js/-/bmp-js-0.1.0.tgz",
"integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==",
"license": "MIT"
},
"node_modules/body-parser": { "node_modules/body-parser": {
"version": "2.2.2", "version": "2.2.2",
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz", "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz",
@@ -7323,6 +7330,12 @@
"url": "https://opencollective.com/express" "url": "https://opencollective.com/express"
} }
}, },
"node_modules/idb-keyval": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-6.2.2.tgz",
"integrity": "sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==",
"license": "Apache-2.0"
},
"node_modules/ignore": { "node_modules/ignore": {
"version": "5.3.2", "version": "5.3.2",
"resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
@@ -7933,6 +7946,12 @@
"url": "https://github.com/sponsors/sindresorhus" "url": "https://github.com/sponsors/sindresorhus"
} }
}, },
"node_modules/is-url": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/is-url/-/is-url-1.2.4.tgz",
"integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==",
"license": "MIT"
},
"node_modules/is-weakmap": { "node_modules/is-weakmap": {
"version": "2.0.2", "version": "2.0.2",
"resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz", "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz",
@@ -9249,6 +9268,15 @@
"url": "https://github.com/sponsors/sindresorhus" "url": "https://github.com/sponsors/sindresorhus"
} }
}, },
"node_modules/opencollective-postinstall": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz",
"integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==",
"license": "MIT",
"bin": {
"opencollective-postinstall": "index.js"
}
},
"node_modules/optionator": { "node_modules/optionator": {
"version": "0.9.4", "version": "0.9.4",
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@@ -9957,6 +9985,12 @@
"url": "https://github.com/sponsors/ljharb" "url": "https://github.com/sponsors/ljharb"
} }
}, },
"node_modules/regenerator-runtime": {
"version": "0.13.11",
"resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz",
"integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==",
"license": "MIT"
},
"node_modules/regexp.prototype.flags": { "node_modules/regexp.prototype.flags": {
"version": "1.5.4", "version": "1.5.4",
"resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.4.tgz", "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.4.tgz",
@@ -10991,6 +11025,50 @@
"url": "https://opencollective.com/webpack" "url": "https://opencollective.com/webpack"
} }
}, },
"node_modules/tesseract.js": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/tesseract.js/-/tesseract.js-7.0.0.tgz",
"integrity": "sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==",
"hasInstallScript": true,
"license": "Apache-2.0",
"dependencies": {
"bmp-js": "^0.1.0",
"idb-keyval": "^6.2.0",
"is-url": "^1.2.4",
"node-fetch": "^2.6.9",
"opencollective-postinstall": "^2.0.3",
"regenerator-runtime": "^0.13.3",
"tesseract.js-core": "^7.0.0",
"wasm-feature-detect": "^1.8.0",
"zlibjs": "^0.3.1"
}
},
"node_modules/tesseract.js-core": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-7.0.0.tgz",
"integrity": "sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==",
"license": "Apache-2.0"
},
"node_modules/tesseract.js/node_modules/node-fetch": {
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
"license": "MIT",
"dependencies": {
"whatwg-url": "^5.0.0"
},
"engines": {
"node": "4.x || >=6.0.0"
},
"peerDependencies": {
"encoding": "^0.1.0"
},
"peerDependenciesMeta": {
"encoding": {
"optional": true
}
}
},
"node_modules/tiny-invariant": { "node_modules/tiny-invariant": {
"version": "1.3.3", "version": "1.3.3",
"resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz", "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
@@ -11113,6 +11191,12 @@
"node": ">=16" "node": ">=16"
} }
}, },
"node_modules/tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
"license": "MIT"
},
"node_modules/ts-api-utils": { "node_modules/ts-api-utils": {
"version": "2.4.0", "version": "2.4.0",
"resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz",
@@ -11576,6 +11660,12 @@
"node": ">= 0.8" "node": ">= 0.8"
} }
}, },
"node_modules/wasm-feature-detect": {
"version": "1.8.0",
"resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.8.0.tgz",
"integrity": "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==",
"license": "Apache-2.0"
},
"node_modules/web-streams-polyfill": { "node_modules/web-streams-polyfill": {
"version": "3.3.3", "version": "3.3.3",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
@@ -11586,6 +11676,22 @@
"node": ">= 8" "node": ">= 8"
} }
}, },
"node_modules/webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
"license": "BSD-2-Clause"
},
"node_modules/whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
"license": "MIT",
"dependencies": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
}
},
"node_modules/which": { "node_modules/which": {
"version": "2.0.2", "version": "2.0.2",
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
@@ -11915,6 +12021,15 @@
"url": "https://github.com/sponsors/sindresorhus" "url": "https://github.com/sponsors/sindresorhus"
} }
}, },
"node_modules/zlibjs": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz",
"integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==",
"license": "MIT",
"engines": {
"node": "*"
}
},
"node_modules/zod": { "node_modules/zod": {
"version": "4.3.6", "version": "4.3.6",
"resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz",

View File

@@ -19,6 +19,7 @@
"react": "19.2.3", "react": "19.2.3",
"react-dom": "19.2.3", "react-dom": "19.2.3",
"tailwind-merge": "^3.4.1", "tailwind-merge": "^3.4.1",
"tesseract.js": "^7.0.0",
"uuid": "^13.0.0" "uuid": "^13.0.0"
}, },
"devDependencies": { "devDependencies": {

View File

@@ -2,6 +2,8 @@ import { NextRequest, NextResponse } from "next/server";
const STIRLING_PDF_URL = const STIRLING_PDF_URL =
process.env.STIRLING_PDF_URL ?? "http://10.10.10.166:8087"; process.env.STIRLING_PDF_URL ?? "http://10.10.10.166:8087";
const STIRLING_PDF_API_KEY =
process.env.STIRLING_PDF_API_KEY ?? "cd829f62-6eef-43eb-a64d-c91af727b53a";
export async function POST(req: NextRequest) { export async function POST(req: NextRequest) {
try { try {
@@ -9,6 +11,7 @@ export async function POST(req: NextRequest) {
const res = await fetch(`${STIRLING_PDF_URL}/api/v1/misc/compress-pdf`, { const res = await fetch(`${STIRLING_PDF_URL}/api/v1/misc/compress-pdf`, {
method: "POST", method: "POST",
headers: { "X-API-KEY": STIRLING_PDF_API_KEY },
body: formData, body: formData,
}); });

View File

@@ -375,6 +375,10 @@ function AiArtifactCleaner() {
r = r.replace(/\u0163/g, "ț"); r = r.replace(/\u0163/g, "ț");
// Remove zero-width and invisible chars // Remove zero-width and invisible chars
r = r.replace(/[\u200b\u200c\u200d\ufeff]/g, ""); r = r.replace(/[\u200b\u200c\u200d\ufeff]/g, "");
// Remove emoji
r = r.replace(/\p{Extended_Pictographic}/gu, "");
r = r.replace(/[\u{1F1E0}-\u{1F1FF}]/gu, ""); // flag emoji
r = r.replace(/[\u{FE00}-\u{FE0F}\u{20D0}-\u{20FF}]/gu, ""); // variation selectors
// Normalize typography // Normalize typography
r = r.replace(/[""]/g, '"'); r = r.replace(/[""]/g, '"');
r = r.replace(/['']/g, "'"); r = r.replace(/['']/g, "'");
@@ -413,9 +417,9 @@ function AiArtifactCleaner() {
</div> </div>
</div> </div>
<p className="text-xs text-muted-foreground"> <p className="text-xs text-muted-foreground">
Operații: eliminare markdown (###, **, `, liste, citate), corectare Operații: eliminare markdown (###, **, `, liste, citate), emoji,
encoding românesc (mojibake), curățare Unicode invizibil, normalizare corectare encoding românesc (mojibake), curățare Unicode invizibil,
ghilimele / cratime / spații multiple. normalizare ghilimele / cratime / spații multiple.
</p> </p>
</div> </div>
); );
@@ -574,33 +578,139 @@ function PdfReducer() {
// ─── Quick OCR ──────────────────────────────────────────────────────────────── // ─── Quick OCR ────────────────────────────────────────────────────────────────
function QuickOcr() { function QuickOcr() {
const [imageSrc, setImageSrc] = useState<string | null>(null);
const [text, setText] = useState("");
const [progress, setProgress] = useState(0);
const [loading, setLoading] = useState(false);
const [error, setError] = useState("");
const [lang, setLang] = useState("ron+eng");
const fileRef = useRef<HTMLInputElement>(null);
const runOcr = async (src: string) => {
if (loading) return;
setLoading(true);
setError("");
setText("");
setProgress(0);
try {
const { createWorker } = await import("tesseract.js");
const worker = await createWorker(lang.split("+"), 1, {
logger: (m: { status: string; progress: number }) => {
if (m.status === "recognizing text")
setProgress(Math.round(m.progress * 100));
},
});
const { data } = await worker.recognize(src);
setText(data.text.trim());
await worker.terminate();
} catch (e) {
setError(e instanceof Error ? e.message : "Eroare OCR necunoscută");
} finally {
setLoading(false);
}
};
const handleFile = (file: File) => {
const reader = new FileReader();
reader.onload = (e) => {
const src = e.target?.result as string;
setImageSrc(src);
runOcr(src);
};
reader.readAsDataURL(file);
};
const handleDrop = (e: React.DragEvent) => {
e.preventDefault();
const file = Array.from(e.dataTransfer.files).find((f) =>
f.type.startsWith("image/"),
);
if (file) handleFile(file);
};
const handlePaste = (e: React.ClipboardEvent) => {
const item = Array.from(e.clipboardData.items).find((i) =>
i.type.startsWith("image/"),
);
const file = item?.getAsFile();
if (file) handleFile(file);
};
return ( return (
<div className="space-y-3"> <div className="space-y-3" onPaste={handlePaste}>
<div className="flex flex-wrap items-center gap-3 text-sm"> <div className="flex flex-wrap items-center gap-3">
<a <select
href="https://ocr.z.ai" value={lang}
target="_blank" onChange={(e) => setLang(e.target.value)}
rel="noopener noreferrer" className="rounded-md border bg-background px-3 py-1.5 text-sm"
className="text-primary underline underline-offset-2"
> >
Deschide ocr.z.ai ↗ <option value="ron+eng">Română + Engleză</option>
</a> <option value="ron">Română</option>
<span className="text-muted-foreground">•</span> <option value="eng">Engleză</option>
<span className="text-muted-foreground text-xs"> </select>
Extragere text din imagini și PDF-uri scanate <span className="text-xs text-muted-foreground">
sau Ctrl+V pentru a lipi imaginea
</span> </span>
</div> </div>
<div <div
className="overflow-hidden rounded-md border" className="flex min-h-[120px] cursor-pointer items-center justify-center rounded-md border-2 border-dashed p-4 text-sm text-muted-foreground transition-colors hover:border-primary/50"
style={{ height: "560px" }} onClick={() => fileRef.current?.click()}
onDrop={handleDrop}
onDragOver={(e) => e.preventDefault()}
> >
<iframe {imageSrc ? (
src="https://ocr.z.ai" // eslint-disable-next-line @next/next/no-img-element
className="h-full w-full" <img
title="OCR — extragere text din imagini" src={imageSrc}
allow="fullscreen" alt="preview"
/> className="max-h-48 max-w-full rounded object-contain"
/>
) : (
<span>Trage o imagine aici, apasă pentru a selecta, sau Ctrl+V</span>
)}
</div> </div>
<input
ref={fileRef}
type="file"
accept="image/*"
className="hidden"
onChange={(e) => {
const f = e.target.files?.[0];
if (f) handleFile(f);
}}
/>
{loading && (
<div className="space-y-1">
<div className="flex justify-between text-xs text-muted-foreground">
<span>Se procesează... (primul rulaj descarcă modelul ~10 MB)</span>
<span>{progress}%</span>
</div>
<div className="h-1.5 w-full overflow-hidden rounded-full bg-muted">
<div
className="h-full bg-primary transition-all"
style={{ width: `${progress}%` }}
/>
</div>
</div>
)}
{error && <p className="text-sm text-destructive">{error}</p>}
{text && (
<div>
<div className="flex items-center justify-between">
<Label>Text extras</Label>
<CopyButton text={text} />
</div>
<Textarea
value={text}
readOnly
className="mt-1 h-56 font-mono text-xs bg-muted/30"
/>
</div>
)}
</div> </div>
); );
} }