#!/bin/bash # AFIR historical XLSX importer wrapper. # # Downloads a yearly AFIR FEADR/FEGA XLSX, normalizes to pipe-TSV, ships to # satra, COPYs into fonduri.staging_afir, then INSERTs into fonduri.afir_plati # with source_year tagging. # # Idempotent: rows with the matching source_year are deleted before insert # (XLSX dumps are stateless reflections of AFIR DB at publication time). # # Usage: # ./import-afir-historical.sh URL YEAR FUND [LIMIT] # URL: AFIR XLSX direct download URL # YEAR: 4-digit source year, e.g. 2023 # FUND: 'feadr' or 'fega' (informational; schema is identical) # LIMIT: optional integer — only insert first N rows (smoke test) # # Example: # ./import-afir-historical.sh \ # 'https://www.afir.ro/media/35cm3jdr/listaplati_2023_feadr_actualizata.xlsx' \ # 2023 feadr # # Smoke test (1000 rows): # ./import-afir-historical.sh '' 2023 feadr 1000 set -euo pipefail URL="${1:?URL required}" YEAR="${2:?YEAR required}" FUND="${3:?FUND required (feadr|fega)}" LIMIT="${4:-}" if ! [[ "$YEAR" =~ ^20[0-9]{2}$ ]]; then echo "[afir-historical] ERROR: YEAR must be 4-digit (got: $YEAR)" >&2 exit 2 fi if [[ "$FUND" != "feadr" && "$FUND" != "fega" ]]; then echo "[afir-historical] ERROR: FUND must be 'feadr' or 'fega' (got: $FUND)" >&2 exit 2 fi WORK_LOCAL="/tmp/afir-historical-$$" WORK_REMOTE="/tmp/afir-historical-$YEAR-$FUND" trap "rm -rf $WORK_LOCAL" EXIT mkdir -p "$WORK_LOCAL" XLSX_LOCAL="$WORK_LOCAL/listaplati_${YEAR}_${FUND}.xlsx" TSV_LOCAL="$WORK_LOCAL/listaplati_${YEAR}_${FUND}.tsv" echo "[afir-historical] === ${YEAR} ${FUND} ===" # 1. Download (resume-friendly, large file safe). Run on satra to skip the # upload-back-to-server hop — the XLSX is 30 MB. echo "[afir-historical] downloading on satra..." ssh satra "mkdir -p $WORK_REMOTE && curl -sLkf --max-time 600 -o $WORK_REMOTE/listaplati.xlsx '$URL' && ls -lh $WORK_REMOTE/listaplati.xlsx" # 2. Normalize to pipe-delimited TSV using existing python3-openpyxl on satra. SCRIPT_DIR="$(cd "$(dirname "$0")/.." && pwd)/scripts" echo "[afir-historical] uploading normalizer..." scp -q "$SCRIPT_DIR/import-afir-historical.py" satra:$WORK_REMOTE/normalize.py echo "[afir-historical] normalizing XLSX → TSV (this takes ~2-5 min for 500K rows)..." ssh satra "python3 $WORK_REMOTE/normalize.py $WORK_REMOTE/listaplati.xlsx $WORK_REMOTE/data.tsv 2>&1 | tail -20" # 3. Optional smoke-test truncation TSV_REMOTE="$WORK_REMOTE/data.tsv" if [ -n "$LIMIT" ]; then echo "[afir-historical] LIMIT=$LIMIT — truncating TSV for smoke test..." ssh satra "head -n $LIMIT $WORK_REMOTE/data.tsv > $WORK_REMOTE/data.smoke.tsv && wc -l $WORK_REMOTE/data.smoke.tsv" TSV_REMOTE="$WORK_REMOTE/data.smoke.tsv" fi # 4. Stage + INSERT on Postgres via /tmp/baseline.sh (Infisical-aware psql wrapper). echo "[afir-historical] staging + insert..." ssh satra "/tmp/baseline.sh < 0 THEN 1 END) AS with_feadr, SUM(CASE WHEN fega_total > 0 THEN 1 END) AS with_fega, SUM(ue_total)::bigint AS sum_ue_eur FROM fonduri.afir_plati WHERE source_year = $YEAR; SQL" if [ -z "$LIMIT" ]; then echo "[afir-historical] cleaning up remote workdir..." ssh satra "rm -rf $WORK_REMOTE" fi echo "[afir-historical] === done ($YEAR $FUND) ==="