feat(epay): three layers of download/poll resilience
After 327649 hit a transient ANCPI 500 on download (succeeded immediately on manual retry), make the pipeline self-heal instead of marking the row failed: 1. downloadDocument retries transient failures (5xx, network/timeout, empty body, non-PDF error page) up to 4 attempts with linear backoff (3/6/9s); a 4xx is permanent and stops immediately. The %PDF guard stays — a bad body is now retried rather than thrown on the first try. 2. pollUntilComplete tolerates a transient error on a single poll: it logs and continues to the next cycle instead of throwing out of the whole batch (one ANCPI blip during polling no longer fails a paid order). 3. finalizeOrder runs a final retry sweep: any row still failed after the parallel pass is re-attempted once more after a short pause (covers a longer ANCPI blip or a MinIO hiccup). No new charge — the order is already paid. Same downloadDocument + pollUntilComplete hardening ported to eterra-live. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -51,6 +51,9 @@ const POLL_MAX_ATTEMPTS = 40;
|
||||
// ShowOrderDetails page size — large enough to fetch any realistic batch in
|
||||
// one request (see getOrderStatus / QW4).
|
||||
const ORDER_PAGE_SIZE = 50;
|
||||
// Document download retry (transient ANCPI 5xx / timeout / error-page).
|
||||
const DOWNLOAD_MAX_ATTEMPTS = 4;
|
||||
const DOWNLOAD_RETRY_DELAY_MS = 3_000; // linear backoff: 3s, 6s, 9s
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Session cache */
|
||||
@@ -779,11 +782,21 @@ export class EpayClient {
|
||||
onProgress?: (attempt: number, status: string) => void,
|
||||
): Promise<EpayOrderStatus> {
|
||||
for (let attempt = 1; attempt <= POLL_MAX_ATTEMPTS; attempt++) {
|
||||
try {
|
||||
const status = await this.getOrderStatus(orderId);
|
||||
if (onProgress) onProgress(attempt, status.status);
|
||||
if (["Finalizata", "Anulata", "Plata refuzata"].includes(status.status)) {
|
||||
return status;
|
||||
}
|
||||
} catch (err) {
|
||||
// A transient ANCPI error (5xx, timeout) on ONE poll must not abort
|
||||
// the whole batch — the order is paid and still being processed.
|
||||
// Log and try again on the next cycle.
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.warn(
|
||||
`[epay] poll ${attempt}/${POLL_MAX_ATTEMPTS} for order ${orderId} errored (${msg}); continuing`,
|
||||
);
|
||||
}
|
||||
await sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
throw new Error(`ePay order ${orderId} timed out after ${POLL_MAX_ATTEMPTS} poll attempts`);
|
||||
@@ -793,29 +806,57 @@ export class EpayClient {
|
||||
|
||||
async downloadDocument(idDocument: number, typeD = 4): Promise<Buffer> {
|
||||
const url = `${BASE_URL}/DownloadFile.action?typeD=${typeD}&id=${idDocument}&source=&browser=chrome`;
|
||||
// Angular sends Content-Type: application/pdf in the REQUEST
|
||||
let lastErr = "unknown";
|
||||
|
||||
// ANCPI's DownloadFile occasionally returns a transient 5xx / times out /
|
||||
// hands back an error page even when the order is finalized (2026-06-05:
|
||||
// 327649 got one 500, then succeeded on the very next attempt). The
|
||||
// download is idempotent, so retry transient failures with backoff before
|
||||
// giving up. A 4xx is treated as permanent (stop immediately).
|
||||
for (let attempt = 1; attempt <= DOWNLOAD_MAX_ATTEMPTS; attempt++) {
|
||||
try {
|
||||
const response = await this.client.post(url, null, {
|
||||
headers: { "Content-Type": "application/pdf" },
|
||||
timeout: DEFAULT_TIMEOUT_MS,
|
||||
responseType: "arraybuffer",
|
||||
validateStatus: () => true, // inspect status ourselves for retry
|
||||
});
|
||||
|
||||
const data = response.data;
|
||||
if (!data || data.length < 100) {
|
||||
throw new Error(`ePay download empty (${data?.length ?? 0} bytes)`);
|
||||
}
|
||||
const buf = Buffer.from(data);
|
||||
// R2: if the ePay session expired mid-batch, DownloadFile returns the
|
||||
// login/error HTML page (200 OK) instead of the PDF. Storing that as a
|
||||
// ".pdf" silently corrupts the extract. Assert the PDF magic bytes.
|
||||
if (buf.subarray(0, 5).toString("latin1") !== "%PDF-") {
|
||||
const head = buf.subarray(0, 64).toString("latin1");
|
||||
throw new Error(
|
||||
`ePay download not a PDF (idDocument=${idDocument}, ${buf.length} bytes, head="${head.replace(/\s+/g, " ").slice(0, 40)}") — session may have expired`,
|
||||
);
|
||||
if (response.status >= 400) {
|
||||
lastErr = `HTTP ${response.status}`;
|
||||
if (response.status < 500) break; // client error — won't fix on retry
|
||||
} else {
|
||||
const buf = Buffer.from(response.data ?? Buffer.alloc(0));
|
||||
if (buf.length < 100) {
|
||||
lastErr = `empty (${buf.length} bytes)`;
|
||||
} else if (buf.subarray(0, 5).toString("latin1") !== "%PDF-") {
|
||||
// Not a PDF — usually a transient ANCPI error page or an expired
|
||||
// session. Retry; a fresh attempt often returns the real PDF.
|
||||
const head = buf.subarray(0, 48).toString("latin1").replace(/\s+/g, " ");
|
||||
lastErr = `not a PDF (head="${head.slice(0, 40)}")`;
|
||||
} else {
|
||||
if (attempt > 1) {
|
||||
console.log(`[epay] download ${idDocument} recovered on attempt ${attempt}`);
|
||||
}
|
||||
console.log(`[epay] Downloaded document ${idDocument}: ${buf.length} bytes`);
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
// Network error / timeout — retryable.
|
||||
lastErr = err instanceof Error ? err.message : String(err);
|
||||
}
|
||||
|
||||
if (attempt < DOWNLOAD_MAX_ATTEMPTS) {
|
||||
console.warn(
|
||||
`[epay] download ${idDocument} attempt ${attempt} failed (${lastErr}); retrying in ${DOWNLOAD_RETRY_DELAY_MS * attempt}ms`,
|
||||
);
|
||||
await sleep(DOWNLOAD_RETRY_DELAY_MS * attempt);
|
||||
}
|
||||
}
|
||||
throw new Error(
|
||||
`ePay download failed after ${DOWNLOAD_MAX_ATTEMPTS} attempts (idDocument=${idDocument}): ${lastErr}`,
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -574,10 +574,10 @@ async function finalizeOrder(
|
||||
plans.push({ item, doc, matchedByIndex, index: next });
|
||||
}
|
||||
|
||||
// Step 6: download + store in parallel (bounded). Each task is fully
|
||||
// self-contained so a failure on one row doesn't abort the others. The
|
||||
// file index is pre-allocated above, so parallel stores never overwrite.
|
||||
await runWithConcurrency(plans, DOWNLOAD_CONCURRENCY, async ({ item, doc, matchedByIndex, index: fileIndex }) => {
|
||||
// One plan's download + store. Returns true on success. On failure it
|
||||
// marks the row failed and returns false so the caller can retry it.
|
||||
const downloadAndStore = async (plan: Plan): Promise<boolean> => {
|
||||
const { item, doc, matchedByIndex, index: fileIndex } = plan;
|
||||
try {
|
||||
await updateStatus(item.extractId, "downloading", {
|
||||
idDocument: doc.idDocument,
|
||||
@@ -629,15 +629,43 @@ async function finalizeOrder(
|
||||
console.log(
|
||||
`[epay-queue] ${matchedByIndex ? "Review" : "Completed"}: ${item.input.nrCadastral} → ${path}`,
|
||||
);
|
||||
return true;
|
||||
} catch (error) {
|
||||
const message =
|
||||
error instanceof Error ? error.message : "Eroare download/stocare";
|
||||
await updateStatus(item.extractId, "failed", {
|
||||
errorMessage: message,
|
||||
});
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Step 6: download + store in parallel (bounded). Each task is fully
|
||||
// self-contained so a failure on one row doesn't abort the others. The
|
||||
// file index is pre-allocated above, so parallel stores never overwrite.
|
||||
// downloadDocument already retries transient ANCPI errors per call; this
|
||||
// adds a SECOND layer — a final sweep that re-attempts any row still
|
||||
// failed (covers a longer ANCPI blip or a MinIO hiccup) with no new
|
||||
// charge, since the order is already paid.
|
||||
const failed: Plan[] = [];
|
||||
await runWithConcurrency(plans, DOWNLOAD_CONCURRENCY, async (plan) => {
|
||||
const ok = await downloadAndStore(plan);
|
||||
if (!ok) failed.push(plan);
|
||||
});
|
||||
|
||||
if (failed.length > 0) {
|
||||
console.warn(
|
||||
`[epay-queue] ${failed.length}/${plans.length} downloads failed for order ${orderId} — retry sweep in 5s...`,
|
||||
);
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
for (const plan of failed) {
|
||||
const ok = await downloadAndStore(plan);
|
||||
console.log(
|
||||
`[epay-queue] retry sweep ${plan.item.input.nrCadastral}: ${ok ? "recovered" : "still failed"}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Update credits after successful order
|
||||
const newCredits = await client.getCredits();
|
||||
updateEpayCredits(newCredits);
|
||||
|
||||
Reference in New Issue
Block a user