#!/bin/bash # Imports MFP non-WEB_UU/BL_BS_SL financial categories into separate tables. # Currently handles WEB_ONG (46 indicators, NGO-specific) and WEB_Inst_de_credit # (23 IFRS indicators for banks). Other small categories (IFN, ASIG, BROK, SIF, # PENSII, VS, VM, IP_IEME, IR, FOND_GARANTARE) can follow the same pattern with # their own tables; for now we treat them as future work since each is <1MB # and < a few hundred records. # # Discovers download URLs via data.gov.ro CKAN API per data year. # # Idempotent. ON CONFLICT (cui, year) DO UPDATE so re-runs refresh latest values. set -uo pipefail DATA_DIR=/opt/vreaudigital/data/mfinante LOG=/var/log/vreaudigital-fin-import.log log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; } mkdir -p "$DATA_DIR" # ── DB env (unchanged from import-financials.sh pattern) ── source /opt/vreaudigital/.infisical-mi TOKEN=$(infisical login --method=universal-auth --domain="$INFISICAL_API_URL" \ --client-id="$INFISICAL_CLIENT_ID" --client-secret="$INFISICAL_CLIENT_SECRET" \ --silent --plain) DBURL=$(infisical run --domain="$INFISICAL_API_URL" \ --projectId="$INFISICAL_PROJECT_ID" --env="$INFISICAL_ENV" \ --path="$INFISICAL_PATH" --silent --token="$TOKEN" \ -- sh -c 'echo "$DATABASE_URL"') DB=$(echo "$DBURL" | sed -E 's/[?&]schema=[^&]*//; s/\?$//') export PGUSER=$(echo "$DB" | sed -E 's|^postgresql://([^:]+):.*|\1|') export PGPASSWORD=$(echo "$DB" | sed -E 's|^postgresql://[^:]+:([^@]+)@.*|\1|') export PGHOST=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@([^:/]+).*|\1|') export PGPORT=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@[^:]+:([0-9]+)/.*|\1|') export PGDATABASE=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@[^/]+/([^?]+).*|\1|') unset DBURL TOKEN DB log "=== ONG + Banks import started ===" # Apply schema if not present. psql -v ON_ERROR_STOP=1 -f /opt/vreaudigital/services/seap-scraper/sql/016_firms_financials_categories.sql >/dev/null # Helper: discover CSV URL via CKAN. Slug per data year, file pattern per category. discover_url() { local year="$1" local pattern="$2" # e.g. "web_ong_an" or "web_instit_de_credit_an" or "web_inst_de_credit_" local slug case "$year" in 2015) slug="situatii_financiare_2015" ;; 2016) slug="situatii_financiare_2016" ;; 2017) slug="situatii_financiare_2017" ;; 2018) slug="situatii_financiare_2018" ;; 2019) slug="situatii_financiare_2019" ;; 2020) slug="situatii_financiare_2021" ;; # 2020 data lives in 2021 megadump 2021) slug="situatii_financiare_2021" ;; 2022) slug="situatii_financiare_2022" ;; 2023) slug="situatii_financiare2023" ;; 2024) slug="situatii_financiare_2024" ;; *) echo ""; return 1 ;; esac curl -fsSL --max-time 30 "https://data.gov.ro/api/3/action/package_show?id=$slug" 2>/dev/null \ | python3 -c " import json, sys, re d = json.load(sys.stdin) year = '$year' pat = re.compile(r'$pattern' + year + r'\\.txt\$', re.I) for r in d.get('result', {}).get('resources', []): if pat.search(r.get('name', '')): print(r.get('url', '')); break " } # ─── ONG ────────────────────────────────────────────────────────────────── for YEAR in ${YEARS:-2020 2021 2022 2023 2024}; do FILE="$DATA_DIR/web_ong_${YEAR}.txt" if [ ! -s "$FILE" ]; then URL=$(discover_url "$YEAR" "web_ong_an") if [ -z "$URL" ]; then log "[$YEAR/ONG] URL not found, skipping"; continue; fi log "[$YEAR/ONG] Downloading from $URL ..." curl -fsL --max-time 120 -o "$FILE" "$URL" fi log "[$YEAR/ONG] COPY $FILE ($(stat -c%s "$FILE") bytes)..." psql -v ON_ERROR_STOP=1 -c "TRUNCATE TABLE firms.staging_ong;" psql -v ON_ERROR_STOP=1 <&1 | tee -a "$LOG" psql -At -F"|" -c " SELECT 'bank:' || year, COUNT(*) FROM firms.financials_banks GROUP BY year ORDER BY year;" 2>&1 | tee -a "$LOG" log "=== ONG + Banks import done ==="