Files
vreau-digital/services/seap-scraper/cron/import-afir-historical.sh
T
Claude VM a6c03a091e initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
2026-05-13 00:10:32 +03:00

133 lines
5.1 KiB
Bash
Executable File

#!/bin/bash
# AFIR historical XLSX importer wrapper.
#
# Downloads a yearly AFIR FEADR/FEGA XLSX, normalizes to pipe-TSV, ships to
# satra, COPYs into fonduri.staging_afir, then INSERTs into fonduri.afir_plati
# with source_year tagging.
#
# Idempotent: rows with the matching source_year are deleted before insert
# (XLSX dumps are stateless reflections of AFIR DB at publication time).
#
# Usage:
# ./import-afir-historical.sh URL YEAR FUND [LIMIT]
# URL: AFIR XLSX direct download URL
# YEAR: 4-digit source year, e.g. 2023
# FUND: 'feadr' or 'fega' (informational; schema is identical)
# LIMIT: optional integer — only insert first N rows (smoke test)
#
# Example:
# ./import-afir-historical.sh \
# 'https://www.afir.ro/media/35cm3jdr/listaplati_2023_feadr_actualizata.xlsx' \
# 2023 feadr
#
# Smoke test (1000 rows):
# ./import-afir-historical.sh '<url>' 2023 feadr 1000
set -euo pipefail
URL="${1:?URL required}"
YEAR="${2:?YEAR required}"
FUND="${3:?FUND required (feadr|fega)}"
LIMIT="${4:-}"
if ! [[ "$YEAR" =~ ^20[0-9]{2}$ ]]; then
echo "[afir-historical] ERROR: YEAR must be 4-digit (got: $YEAR)" >&2
exit 2
fi
if [[ "$FUND" != "feadr" && "$FUND" != "fega" ]]; then
echo "[afir-historical] ERROR: FUND must be 'feadr' or 'fega' (got: $FUND)" >&2
exit 2
fi
WORK_LOCAL="/tmp/afir-historical-$$"
WORK_REMOTE="/tmp/afir-historical-$YEAR-$FUND"
trap "rm -rf $WORK_LOCAL" EXIT
mkdir -p "$WORK_LOCAL"
XLSX_LOCAL="$WORK_LOCAL/listaplati_${YEAR}_${FUND}.xlsx"
TSV_LOCAL="$WORK_LOCAL/listaplati_${YEAR}_${FUND}.tsv"
echo "[afir-historical] === ${YEAR} ${FUND} ==="
# 1. Download (resume-friendly, large file safe). Run on satra to skip the
# upload-back-to-server hop — the XLSX is 30 MB.
echo "[afir-historical] downloading on satra..."
ssh satra "mkdir -p $WORK_REMOTE && curl -sLkf --max-time 600 -o $WORK_REMOTE/listaplati.xlsx '$URL' && ls -lh $WORK_REMOTE/listaplati.xlsx"
# 2. Normalize to pipe-delimited TSV using existing python3-openpyxl on satra.
SCRIPT_DIR="$(cd "$(dirname "$0")/.." && pwd)/scripts"
echo "[afir-historical] uploading normalizer..."
scp -q "$SCRIPT_DIR/import-afir-historical.py" satra:$WORK_REMOTE/normalize.py
echo "[afir-historical] normalizing XLSX → TSV (this takes ~2-5 min for 500K rows)..."
ssh satra "python3 $WORK_REMOTE/normalize.py $WORK_REMOTE/listaplati.xlsx $WORK_REMOTE/data.tsv 2>&1 | tail -20"
# 3. Optional smoke-test truncation
TSV_REMOTE="$WORK_REMOTE/data.tsv"
if [ -n "$LIMIT" ]; then
echo "[afir-historical] LIMIT=$LIMIT — truncating TSV for smoke test..."
ssh satra "head -n $LIMIT $WORK_REMOTE/data.tsv > $WORK_REMOTE/data.smoke.tsv && wc -l $WORK_REMOTE/data.smoke.tsv"
TSV_REMOTE="$WORK_REMOTE/data.smoke.tsv"
fi
# 4. Stage + INSERT on Postgres via /tmp/baseline.sh (Infisical-aware psql wrapper).
echo "[afir-historical] staging + insert..."
ssh satra "/tmp/baseline.sh <<SQL
\\set ON_ERROR_STOP on
TRUNCATE TABLE fonduri.staging_afir;
\\copy fonduri.staging_afir (beneficiar_name, last_name, mama_cui, localitate, cod_masura, obiectiv, data_start, data_end, fega_op, fega_total, feadr_op, feadr_total, op_amount, cofinantare, ue_total) FROM '$TSV_REMOTE' WITH (FORMAT text, DELIMITER '|', NULL '')
SELECT 'staging_loaded' AS step, COUNT(*) AS rows FROM fonduri.staging_afir;
-- Idempotent: drop existing rows for (year, fund) before reinsert.
-- We use cod_masura prefix as a fund discriminator: FEGA codes start with
-- a single letter or specific scheme (DPB, ANTPDD, etc); FEADR is 'M ' prefix
-- or numeric. For safety in the LIMIT smoke test we DON'T delete; only
-- delete on a full run (LIMIT empty).
SQL"
if [ -z "$LIMIT" ]; then
echo "[afir-historical] full run — deleting prior rows for source_year=$YEAR..."
ssh satra "/tmp/baseline.sh -c \"DELETE FROM fonduri.afir_plati WHERE source_year = $YEAR;\""
fi
ssh satra "/tmp/baseline.sh <<SQL
\\set ON_ERROR_STOP on
INSERT INTO fonduri.afir_plati (
source_year, beneficiar_name, last_name, mama_cui, localitate,
cod_masura, obiectiv, data_start, data_end,
fega_op, fega_total, feadr_op, feadr_total,
op_amount, cofinantare, ue_total
)
SELECT
$YEAR,
beneficiar_name, NULLIF(last_name, ''), NULLIF(mama_cui, ''), NULLIF(localitate, ''),
NULLIF(cod_masura, ''), NULLIF(obiectiv, ''), NULLIF(data_start, ''), NULLIF(data_end, ''),
NULLIF(fega_op, '')::numeric,
NULLIF(fega_total, '')::numeric,
NULLIF(feadr_op, '')::numeric,
NULLIF(feadr_total, '')::numeric,
NULLIF(op_amount, '')::numeric,
NULLIF(cofinantare, '')::numeric,
NULLIF(ue_total, '')::numeric
FROM fonduri.staging_afir;
SELECT '$YEAR-$FUND' AS run,
COUNT(*) AS rows_inserted,
COUNT(DISTINCT beneficiar_name) AS distinct_beneficiars,
SUM(CASE WHEN feadr_total > 0 THEN 1 END) AS with_feadr,
SUM(CASE WHEN fega_total > 0 THEN 1 END) AS with_fega,
SUM(ue_total)::bigint AS sum_ue_eur
FROM fonduri.afir_plati WHERE source_year = $YEAR;
SQL"
if [ -z "$LIMIT" ]; then
echo "[afir-historical] cleaning up remote workdir..."
ssh satra "rm -rf $WORK_REMOTE"
fi
echo "[afir-historical] === done ($YEAR $FUND) ==="