Files
vreau-digital/services/seap-scraper/cron/import-financials-ong-banks.sh
Claude VM a6c03a091e initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
2026-05-13 00:10:32 +03:00

195 lines
9.3 KiB
Bash
Executable File

#!/bin/bash
# Imports MFP non-WEB_UU/BL_BS_SL financial categories into separate tables.
# Currently handles WEB_ONG (46 indicators, NGO-specific) and WEB_Inst_de_credit
# (23 IFRS indicators for banks). Other small categories (IFN, ASIG, BROK, SIF,
# PENSII, VS, VM, IP_IEME, IR, FOND_GARANTARE) can follow the same pattern with
# their own tables; for now we treat them as future work since each is <1MB
# and < a few hundred records.
#
# Discovers download URLs via data.gov.ro CKAN API per data year.
#
# Idempotent. ON CONFLICT (cui, year) DO UPDATE so re-runs refresh latest values.
set -uo pipefail
DATA_DIR=/opt/vreaudigital/data/mfinante
LOG=/var/log/vreaudigital-fin-import.log
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; }
mkdir -p "$DATA_DIR"
# ── DB env (unchanged from import-financials.sh pattern) ──
source /opt/vreaudigital/.infisical-mi
TOKEN=$(infisical login --method=universal-auth --domain="$INFISICAL_API_URL" \
--client-id="$INFISICAL_CLIENT_ID" --client-secret="$INFISICAL_CLIENT_SECRET" \
--silent --plain)
DBURL=$(infisical run --domain="$INFISICAL_API_URL" \
--projectId="$INFISICAL_PROJECT_ID" --env="$INFISICAL_ENV" \
--path="$INFISICAL_PATH" --silent --token="$TOKEN" \
-- sh -c 'echo "$DATABASE_URL"')
DB=$(echo "$DBURL" | sed -E 's/[?&]schema=[^&]*//; s/\?$//')
export PGUSER=$(echo "$DB" | sed -E 's|^postgresql://([^:]+):.*|\1|')
export PGPASSWORD=$(echo "$DB" | sed -E 's|^postgresql://[^:]+:([^@]+)@.*|\1|')
export PGHOST=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@([^:/]+).*|\1|')
export PGPORT=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@[^:]+:([0-9]+)/.*|\1|')
export PGDATABASE=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@[^/]+/([^?]+).*|\1|')
unset DBURL TOKEN DB
log "=== ONG + Banks import started ==="
# Apply schema if not present.
psql -v ON_ERROR_STOP=1 -f /opt/vreaudigital/services/seap-scraper/sql/016_firms_financials_categories.sql >/dev/null
# Helper: discover CSV URL via CKAN. Slug per data year, file pattern per category.
discover_url() {
local year="$1"
local pattern="$2" # e.g. "web_ong_an" or "web_instit_de_credit_an" or "web_inst_de_credit_"
local slug
case "$year" in
2015) slug="situatii_financiare_2015" ;;
2016) slug="situatii_financiare_2016" ;;
2017) slug="situatii_financiare_2017" ;;
2018) slug="situatii_financiare_2018" ;;
2019) slug="situatii_financiare_2019" ;;
2020) slug="situatii_financiare_2021" ;; # 2020 data lives in 2021 megadump
2021) slug="situatii_financiare_2021" ;;
2022) slug="situatii_financiare_2022" ;;
2023) slug="situatii_financiare2023" ;;
2024) slug="situatii_financiare_2024" ;;
*) echo ""; return 1 ;;
esac
curl -fsSL --max-time 30 "https://data.gov.ro/api/3/action/package_show?id=$slug" 2>/dev/null \
| python3 -c "
import json, sys, re
d = json.load(sys.stdin)
year = '$year'
pat = re.compile(r'$pattern' + year + r'\\.txt\$', re.I)
for r in d.get('result', {}).get('resources', []):
if pat.search(r.get('name', '')):
print(r.get('url', '')); break
"
}
# ─── ONG ──────────────────────────────────────────────────────────────────
for YEAR in ${YEARS:-2020 2021 2022 2023 2024}; do
FILE="$DATA_DIR/web_ong_${YEAR}.txt"
if [ ! -s "$FILE" ]; then
URL=$(discover_url "$YEAR" "web_ong_an")
if [ -z "$URL" ]; then log "[$YEAR/ONG] URL not found, skipping"; continue; fi
log "[$YEAR/ONG] Downloading from $URL ..."
curl -fsL --max-time 120 -o "$FILE" "$URL"
fi
log "[$YEAR/ONG] COPY $FILE ($(stat -c%s "$FILE") bytes)..."
psql -v ON_ERROR_STOP=1 -c "TRUNCATE TABLE firms.staging_ong;"
psql -v ON_ERROR_STOP=1 <<COPYEOF
\\copy firms.staging_ong (cui, caen, caeno, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16, i17, i18, i19, i20, i21, i22, i23, i24, i25, i26, i27, i28, i29, i30, i31, i32, i33, i34, i35, i36, i37, i38, i39, i40, i41, i42, i43, i44, i45, i46) FROM '$FILE' WITH (FORMAT csv, DELIMITER ',', HEADER true, NULL '');
COPYEOF
log "[$YEAR/ONG] UPSERT into firms.financials_ong..."
psql -v ON_ERROR_STOP=1 <<SQL
INSERT INTO firms.financials_ong (
cui, year, caen, caeno,
capitaluri_proprii, venituri_total, cheltuieli_total, excedent,
personal_neeconomic, personal_economic, indicators
)
SELECT DISTINCT ON (cui)
cui, $YEAR, caen, caeno,
NULLIF(i12, '')::numeric(20,2),
NULLIF(i38, '')::numeric(20,2),
NULLIF(i40, '')::numeric(20,2),
NULLIF(i42, '')::numeric(20,2),
CASE WHEN NULLIF(i45, '') ~ '^[0-9]+\$' AND NULLIF(i45, '')::bigint BETWEEN 0 AND 100000000 THEN i45::bigint ELSE NULL END,
CASE WHEN NULLIF(i46, '') ~ '^[0-9]+\$' AND NULLIF(i46, '')::bigint BETWEEN 0 AND 100000000 THEN i46::bigint ELSE NULL END,
jsonb_strip_nulls(jsonb_build_object(
'i1', NULLIF(i1, ''), 'i2', NULLIF(i2, ''), 'i3', NULLIF(i3, ''), 'i4', NULLIF(i4, ''),
'i5', NULLIF(i5, ''), 'i6', NULLIF(i6, ''), 'i7', NULLIF(i7, ''), 'i8', NULLIF(i8, ''),
'i9', NULLIF(i9, ''), 'i10', NULLIF(i10, ''), 'i11', NULLIF(i11, ''), 'i12', NULLIF(i12, ''),
'i13', NULLIF(i13, ''), 'i14', NULLIF(i14, ''), 'i15', NULLIF(i15, ''), 'i16', NULLIF(i16, ''),
'i17', NULLIF(i17, ''), 'i18', NULLIF(i18, ''), 'i19', NULLIF(i19, ''), 'i20', NULLIF(i20, ''),
'i21', NULLIF(i21, ''), 'i22', NULLIF(i22, ''), 'i23', NULLIF(i23, ''), 'i24', NULLIF(i24, ''),
'i25', NULLIF(i25, ''), 'i26', NULLIF(i26, ''), 'i27', NULLIF(i27, ''), 'i28', NULLIF(i28, ''),
'i29', NULLIF(i29, ''), 'i30', NULLIF(i30, ''), 'i31', NULLIF(i31, ''), 'i32', NULLIF(i32, ''),
'i33', NULLIF(i33, ''), 'i34', NULLIF(i34, ''), 'i35', NULLIF(i35, ''), 'i36', NULLIF(i36, ''),
'i37', NULLIF(i37, ''), 'i38', NULLIF(i38, ''), 'i39', NULLIF(i39, ''), 'i40', NULLIF(i40, ''),
'i41', NULLIF(i41, ''), 'i42', NULLIF(i42, ''), 'i43', NULLIF(i43, ''), 'i44', NULLIF(i44, ''),
'i45', NULLIF(i45, ''), 'i46', NULLIF(i46, '')
))
FROM firms.staging_ong
WHERE cui IS NOT NULL AND cui != '' AND cui != '0'
ORDER BY cui
ON CONFLICT (cui, year) DO UPDATE SET
caen = EXCLUDED.caen,
caeno = EXCLUDED.caeno,
capitaluri_proprii = EXCLUDED.capitaluri_proprii,
venituri_total = EXCLUDED.venituri_total,
cheltuieli_total = EXCLUDED.cheltuieli_total,
excedent = EXCLUDED.excedent,
personal_neeconomic = EXCLUDED.personal_neeconomic,
personal_economic = EXCLUDED.personal_economic,
indicators = EXCLUDED.indicators,
fetched_at = now();
SQL
done
# ─── BĂNCI / Instituții de Credit ─────────────────────────────────────────
for YEAR in ${YEARS:-2020 2021 2022 2023 2024}; do
FILE="$DATA_DIR/web_inst_de_credit_${YEAR}.txt"
if [ ! -s "$FILE" ]; then
# Filename differs per year — sometimes web_instit_de_credit_an, sometimes web_inst_de_credit_
URL=$(discover_url "$YEAR" "web_(inst|instit)_de_credit_(an)?")
if [ -z "$URL" ]; then log "[$YEAR/BANK] URL not found, skipping"; continue; fi
log "[$YEAR/BANK] Downloading from $URL ..."
curl -fsL --max-time 60 -o "$FILE" "$URL"
fi
log "[$YEAR/BANK] COPY $FILE ($(stat -c%s "$FILE") bytes)..."
psql -v ON_ERROR_STOP=1 -c "TRUNCATE TABLE firms.staging_banks;"
psql -v ON_ERROR_STOP=1 <<COPYEOF
\\copy firms.staging_banks (cui, caen, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16, i17, i18, i19, i20, i21, i22, i23) FROM '$FILE' WITH (FORMAT csv, DELIMITER ',', HEADER true, NULL '');
COPYEOF
log "[$YEAR/BANK] UPSERT into firms.financials_banks..."
psql -v ON_ERROR_STOP=1 <<SQL
INSERT INTO firms.financials_banks (
cui, year, caen,
active_financiare_amortiz, capital_social, profit_exercitiu,
profit_inainte_impozit, cifra_afaceri, indicators
)
SELECT DISTINCT ON (cui)
cui, $YEAR, caen,
NULLIF(i6, '')::numeric(20,2),
NULLIF(i14, '')::numeric(20,2),
NULLIF(i22, '')::numeric(20,2),
NULLIF(i19, '')::numeric(20,2),
NULLIF(i23, '')::numeric(20,2),
jsonb_strip_nulls(jsonb_build_object(
'i1', NULLIF(i1, ''), 'i2', NULLIF(i2, ''), 'i3', NULLIF(i3, ''), 'i4', NULLIF(i4, ''),
'i5', NULLIF(i5, ''), 'i6', NULLIF(i6, ''), 'i7', NULLIF(i7, ''), 'i8', NULLIF(i8, ''),
'i9', NULLIF(i9, ''), 'i10', NULLIF(i10, ''), 'i11', NULLIF(i11, ''), 'i12', NULLIF(i12, ''),
'i13', NULLIF(i13, ''), 'i14', NULLIF(i14, ''), 'i15', NULLIF(i15, ''), 'i16', NULLIF(i16, ''),
'i17', NULLIF(i17, ''), 'i18', NULLIF(i18, ''), 'i19', NULLIF(i19, ''), 'i20', NULLIF(i20, ''),
'i21', NULLIF(i21, ''), 'i22', NULLIF(i22, ''), 'i23', NULLIF(i23, '')
))
FROM firms.staging_banks
WHERE cui IS NOT NULL AND cui != '' AND cui != '0'
ORDER BY cui
ON CONFLICT (cui, year) DO UPDATE SET
caen = EXCLUDED.caen,
active_financiare_amortiz = EXCLUDED.active_financiare_amortiz,
capital_social = EXCLUDED.capital_social,
profit_exercitiu = EXCLUDED.profit_exercitiu,
profit_inainte_impozit = EXCLUDED.profit_inainte_impozit,
cifra_afaceri = EXCLUDED.cifra_afaceri,
indicators = EXCLUDED.indicators,
fetched_at = now();
SQL
done
log "=== ONG + Banks final stats ==="
psql -At -F"|" -c "
SELECT 'ong:' || year, COUNT(*) FROM firms.financials_ong GROUP BY year ORDER BY year;" 2>&1 | tee -a "$LOG"
psql -At -F"|" -c "
SELECT 'bank:' || year, COUNT(*) FROM firms.financials_banks GROUP BY year ORDER BY year;" 2>&1 | tee -a "$LOG"
log "=== ONG + Banks import done ==="