feat(epay): three layers of download/poll resilience
After 327649 hit a transient ANCPI 500 on download (succeeded immediately on manual retry), make the pipeline self-heal instead of marking the row failed: 1. downloadDocument retries transient failures (5xx, network/timeout, empty body, non-PDF error page) up to 4 attempts with linear backoff (3/6/9s); a 4xx is permanent and stops immediately. The %PDF guard stays — a bad body is now retried rather than thrown on the first try. 2. pollUntilComplete tolerates a transient error on a single poll: it logs and continues to the next cycle instead of throwing out of the whole batch (one ANCPI blip during polling no longer fails a paid order). 3. finalizeOrder runs a final retry sweep: any row still failed after the parallel pass is re-attempted once more after a short pause (covers a longer ANCPI blip or a MinIO hiccup). No new charge — the order is already paid. Same downloadDocument + pollUntilComplete hardening ported to eterra-live. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -51,6 +51,9 @@ const POLL_MAX_ATTEMPTS = 40;
|
|||||||
// ShowOrderDetails page size — large enough to fetch any realistic batch in
|
// ShowOrderDetails page size — large enough to fetch any realistic batch in
|
||||||
// one request (see getOrderStatus / QW4).
|
// one request (see getOrderStatus / QW4).
|
||||||
const ORDER_PAGE_SIZE = 50;
|
const ORDER_PAGE_SIZE = 50;
|
||||||
|
// Document download retry (transient ANCPI 5xx / timeout / error-page).
|
||||||
|
const DOWNLOAD_MAX_ATTEMPTS = 4;
|
||||||
|
const DOWNLOAD_RETRY_DELAY_MS = 3_000; // linear backoff: 3s, 6s, 9s
|
||||||
|
|
||||||
/* ------------------------------------------------------------------ */
|
/* ------------------------------------------------------------------ */
|
||||||
/* Session cache */
|
/* Session cache */
|
||||||
@@ -779,11 +782,21 @@ export class EpayClient {
|
|||||||
onProgress?: (attempt: number, status: string) => void,
|
onProgress?: (attempt: number, status: string) => void,
|
||||||
): Promise<EpayOrderStatus> {
|
): Promise<EpayOrderStatus> {
|
||||||
for (let attempt = 1; attempt <= POLL_MAX_ATTEMPTS; attempt++) {
|
for (let attempt = 1; attempt <= POLL_MAX_ATTEMPTS; attempt++) {
|
||||||
|
try {
|
||||||
const status = await this.getOrderStatus(orderId);
|
const status = await this.getOrderStatus(orderId);
|
||||||
if (onProgress) onProgress(attempt, status.status);
|
if (onProgress) onProgress(attempt, status.status);
|
||||||
if (["Finalizata", "Anulata", "Plata refuzata"].includes(status.status)) {
|
if (["Finalizata", "Anulata", "Plata refuzata"].includes(status.status)) {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
} catch (err) {
|
||||||
|
// A transient ANCPI error (5xx, timeout) on ONE poll must not abort
|
||||||
|
// the whole batch — the order is paid and still being processed.
|
||||||
|
// Log and try again on the next cycle.
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
console.warn(
|
||||||
|
`[epay] poll ${attempt}/${POLL_MAX_ATTEMPTS} for order ${orderId} errored (${msg}); continuing`,
|
||||||
|
);
|
||||||
|
}
|
||||||
await sleep(POLL_INTERVAL_MS);
|
await sleep(POLL_INTERVAL_MS);
|
||||||
}
|
}
|
||||||
throw new Error(`ePay order ${orderId} timed out after ${POLL_MAX_ATTEMPTS} poll attempts`);
|
throw new Error(`ePay order ${orderId} timed out after ${POLL_MAX_ATTEMPTS} poll attempts`);
|
||||||
@@ -793,29 +806,57 @@ export class EpayClient {
|
|||||||
|
|
||||||
async downloadDocument(idDocument: number, typeD = 4): Promise<Buffer> {
|
async downloadDocument(idDocument: number, typeD = 4): Promise<Buffer> {
|
||||||
const url = `${BASE_URL}/DownloadFile.action?typeD=${typeD}&id=${idDocument}&source=&browser=chrome`;
|
const url = `${BASE_URL}/DownloadFile.action?typeD=${typeD}&id=${idDocument}&source=&browser=chrome`;
|
||||||
// Angular sends Content-Type: application/pdf in the REQUEST
|
let lastErr = "unknown";
|
||||||
|
|
||||||
|
// ANCPI's DownloadFile occasionally returns a transient 5xx / times out /
|
||||||
|
// hands back an error page even when the order is finalized (2026-06-05:
|
||||||
|
// 327649 got one 500, then succeeded on the very next attempt). The
|
||||||
|
// download is idempotent, so retry transient failures with backoff before
|
||||||
|
// giving up. A 4xx is treated as permanent (stop immediately).
|
||||||
|
for (let attempt = 1; attempt <= DOWNLOAD_MAX_ATTEMPTS; attempt++) {
|
||||||
|
try {
|
||||||
const response = await this.client.post(url, null, {
|
const response = await this.client.post(url, null, {
|
||||||
headers: { "Content-Type": "application/pdf" },
|
headers: { "Content-Type": "application/pdf" },
|
||||||
timeout: DEFAULT_TIMEOUT_MS,
|
timeout: DEFAULT_TIMEOUT_MS,
|
||||||
responseType: "arraybuffer",
|
responseType: "arraybuffer",
|
||||||
|
validateStatus: () => true, // inspect status ourselves for retry
|
||||||
});
|
});
|
||||||
|
|
||||||
const data = response.data;
|
if (response.status >= 400) {
|
||||||
if (!data || data.length < 100) {
|
lastErr = `HTTP ${response.status}`;
|
||||||
throw new Error(`ePay download empty (${data?.length ?? 0} bytes)`);
|
if (response.status < 500) break; // client error — won't fix on retry
|
||||||
}
|
} else {
|
||||||
const buf = Buffer.from(data);
|
const buf = Buffer.from(response.data ?? Buffer.alloc(0));
|
||||||
// R2: if the ePay session expired mid-batch, DownloadFile returns the
|
if (buf.length < 100) {
|
||||||
// login/error HTML page (200 OK) instead of the PDF. Storing that as a
|
lastErr = `empty (${buf.length} bytes)`;
|
||||||
// ".pdf" silently corrupts the extract. Assert the PDF magic bytes.
|
} else if (buf.subarray(0, 5).toString("latin1") !== "%PDF-") {
|
||||||
if (buf.subarray(0, 5).toString("latin1") !== "%PDF-") {
|
// Not a PDF — usually a transient ANCPI error page or an expired
|
||||||
const head = buf.subarray(0, 64).toString("latin1");
|
// session. Retry; a fresh attempt often returns the real PDF.
|
||||||
throw new Error(
|
const head = buf.subarray(0, 48).toString("latin1").replace(/\s+/g, " ");
|
||||||
`ePay download not a PDF (idDocument=${idDocument}, ${buf.length} bytes, head="${head.replace(/\s+/g, " ").slice(0, 40)}") — session may have expired`,
|
lastErr = `not a PDF (head="${head.slice(0, 40)}")`;
|
||||||
);
|
} else {
|
||||||
|
if (attempt > 1) {
|
||||||
|
console.log(`[epay] download ${idDocument} recovered on attempt ${attempt}`);
|
||||||
}
|
}
|
||||||
console.log(`[epay] Downloaded document ${idDocument}: ${buf.length} bytes`);
|
console.log(`[epay] Downloaded document ${idDocument}: ${buf.length} bytes`);
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
// Network error / timeout — retryable.
|
||||||
|
lastErr = err instanceof Error ? err.message : String(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (attempt < DOWNLOAD_MAX_ATTEMPTS) {
|
||||||
|
console.warn(
|
||||||
|
`[epay] download ${idDocument} attempt ${attempt} failed (${lastErr}); retrying in ${DOWNLOAD_RETRY_DELAY_MS * attempt}ms`,
|
||||||
|
);
|
||||||
|
await sleep(DOWNLOAD_RETRY_DELAY_MS * attempt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new Error(
|
||||||
|
`ePay download failed after ${DOWNLOAD_MAX_ATTEMPTS} attempts (idDocument=${idDocument}): ${lastErr}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -574,10 +574,10 @@ async function finalizeOrder(
|
|||||||
plans.push({ item, doc, matchedByIndex, index: next });
|
plans.push({ item, doc, matchedByIndex, index: next });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 6: download + store in parallel (bounded). Each task is fully
|
// One plan's download + store. Returns true on success. On failure it
|
||||||
// self-contained so a failure on one row doesn't abort the others. The
|
// marks the row failed and returns false so the caller can retry it.
|
||||||
// file index is pre-allocated above, so parallel stores never overwrite.
|
const downloadAndStore = async (plan: Plan): Promise<boolean> => {
|
||||||
await runWithConcurrency(plans, DOWNLOAD_CONCURRENCY, async ({ item, doc, matchedByIndex, index: fileIndex }) => {
|
const { item, doc, matchedByIndex, index: fileIndex } = plan;
|
||||||
try {
|
try {
|
||||||
await updateStatus(item.extractId, "downloading", {
|
await updateStatus(item.extractId, "downloading", {
|
||||||
idDocument: doc.idDocument,
|
idDocument: doc.idDocument,
|
||||||
@@ -629,15 +629,43 @@ async function finalizeOrder(
|
|||||||
console.log(
|
console.log(
|
||||||
`[epay-queue] ${matchedByIndex ? "Review" : "Completed"}: ${item.input.nrCadastral} → ${path}`,
|
`[epay-queue] ${matchedByIndex ? "Review" : "Completed"}: ${item.input.nrCadastral} → ${path}`,
|
||||||
);
|
);
|
||||||
|
return true;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const message =
|
const message =
|
||||||
error instanceof Error ? error.message : "Eroare download/stocare";
|
error instanceof Error ? error.message : "Eroare download/stocare";
|
||||||
await updateStatus(item.extractId, "failed", {
|
await updateStatus(item.extractId, "failed", {
|
||||||
errorMessage: message,
|
errorMessage: message,
|
||||||
});
|
});
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Step 6: download + store in parallel (bounded). Each task is fully
|
||||||
|
// self-contained so a failure on one row doesn't abort the others. The
|
||||||
|
// file index is pre-allocated above, so parallel stores never overwrite.
|
||||||
|
// downloadDocument already retries transient ANCPI errors per call; this
|
||||||
|
// adds a SECOND layer — a final sweep that re-attempts any row still
|
||||||
|
// failed (covers a longer ANCPI blip or a MinIO hiccup) with no new
|
||||||
|
// charge, since the order is already paid.
|
||||||
|
const failed: Plan[] = [];
|
||||||
|
await runWithConcurrency(plans, DOWNLOAD_CONCURRENCY, async (plan) => {
|
||||||
|
const ok = await downloadAndStore(plan);
|
||||||
|
if (!ok) failed.push(plan);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (failed.length > 0) {
|
||||||
|
console.warn(
|
||||||
|
`[epay-queue] ${failed.length}/${plans.length} downloads failed for order ${orderId} — retry sweep in 5s...`,
|
||||||
|
);
|
||||||
|
await new Promise((r) => setTimeout(r, 5000));
|
||||||
|
for (const plan of failed) {
|
||||||
|
const ok = await downloadAndStore(plan);
|
||||||
|
console.log(
|
||||||
|
`[epay-queue] retry sweep ${plan.item.input.nrCadastral}: ${ok ? "recovered" : "still failed"}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Update credits after successful order
|
// Update credits after successful order
|
||||||
const newCredits = await client.getCredits();
|
const newCredits = await client.getCredits();
|
||||||
updateEpayCredits(newCredits);
|
updateEpayCredits(newCredits);
|
||||||
|
|||||||
Reference in New Issue
Block a user