initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
+108
@@ -0,0 +1,108 @@
|
||||
#!/bin/bash
|
||||
# Import financial indicators (Situații financiare) from data.gov.ro per year.
|
||||
# Runs COPY from web_uu_YYYY.txt → staging_financials → firms.financials (PK cui+year).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DATA_DIR=/opt/vreaudigital/data/mfinante
|
||||
LOG=/var/log/vreaudigital-fin-import.log
|
||||
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; }
|
||||
|
||||
source /opt/vreaudigital/.infisical-mi
|
||||
TOKEN=$(infisical login --method=universal-auth --domain="$INFISICAL_API_URL" --client-id="$INFISICAL_CLIENT_ID" --client-secret="$INFISICAL_CLIENT_SECRET" --silent --plain)
|
||||
DATABASE_URL=$(infisical run --domain="$INFISICAL_API_URL" --projectId="$INFISICAL_PROJECT_ID" --env="$INFISICAL_ENV" --path="$INFISICAL_PATH" --silent --token="$TOKEN" -- sh -c 'echo "$DATABASE_URL"')
|
||||
DB=$(echo "$DATABASE_URL" | sed -E 's/[?&]schema=[^&]*//; s/\?$//')
|
||||
export PGUSER=$(echo "$DB" | sed -E 's|^postgresql://([^:]+):.*|\1|')
|
||||
export PGPASSWORD=$(echo "$DB" | sed -E 's|^postgresql://[^:]+:([^@]+)@.*|\1|')
|
||||
export PGHOST=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@([^:/]+).*|\1|')
|
||||
export PGDATABASE=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@[^/]+/([^?]+).*|\1|')
|
||||
unset DATABASE_URL TOKEN DB
|
||||
|
||||
log "=== Financial import started ==="
|
||||
|
||||
# WEB_UU and WEB_BL_BS_SL share the same 22-column schema (CUI, CAEN, I1..I20)
|
||||
# so we can use the same staging table + INSERT for both. The `source` column
|
||||
# tracks which raw category the row came from. WEB_BL_BS_SL covers special-
|
||||
# regime entities (bilanț scurt, lichidare) that aren't in WEB_UU — e.g.
|
||||
# Alliance Healthcare, in-liquidation companies. Together they fill most of
|
||||
# the financial-data gap.
|
||||
|
||||
import_year_category() {
|
||||
local YEAR="$1"
|
||||
local CATEGORY="$2" # WEB_UU | WEB_BL_BS_SL
|
||||
local FILE="$3"
|
||||
local SRC_LABEL="mfinante:${CATEGORY}"
|
||||
|
||||
if [ ! -s "$FILE" ]; then
|
||||
log "[$YEAR/$CATEGORY] [SKIP] $FILE missing"
|
||||
return 0
|
||||
fi
|
||||
log "[$YEAR/$CATEGORY] Truncating staging..."
|
||||
psql -v ON_ERROR_STOP=1 -c "TRUNCATE TABLE firms.staging_financials;"
|
||||
|
||||
log "[$YEAR/$CATEGORY] COPY $FILE..."
|
||||
psql -v ON_ERROR_STOP=1 <<COPYEOF
|
||||
\\copy firms.staging_financials (cui, caen, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16, i17, i18, i19, i20) FROM '$FILE' WITH (FORMAT csv, DELIMITER ',', HEADER true, NULL '');
|
||||
COPYEOF
|
||||
|
||||
log "[$YEAR/$CATEGORY] UPSERT into financials (source=$SRC_LABEL)..."
|
||||
psql -v ON_ERROR_STOP=1 <<SQL
|
||||
INSERT INTO firms.financials (
|
||||
cui, year, caen,
|
||||
active_imobilizate, active_circulante, stocuri, creante, casa_banci,
|
||||
cheltuieli_avans, datorii, venituri_avans, provizioane,
|
||||
capitaluri_total, capital_subscris, patrimoniul_regiei,
|
||||
cifra_afaceri, venituri_total, cheltuieli_total,
|
||||
profit_brut, pierdere_bruta, profit_net, pierdere_neta,
|
||||
numar_salariati, source
|
||||
)
|
||||
SELECT DISTINCT ON (cui)
|
||||
cui, $YEAR, caen,
|
||||
i1, i2, i3, i4, i5,
|
||||
i6, i7, i8, i9,
|
||||
i10, i11, i12,
|
||||
i13, i14, i15,
|
||||
i16, i17, i18, i19,
|
||||
-- Sanitize salariati: drop absurd values (data anomalies up to 7.7e14 observed)
|
||||
CASE WHEN i20 BETWEEN 0 AND 100000000 THEN i20::bigint ELSE NULL END,
|
||||
'$SRC_LABEL'
|
||||
FROM firms.staging_financials
|
||||
WHERE cui IS NOT NULL AND cui != '' AND cui != '0'
|
||||
ORDER BY cui
|
||||
ON CONFLICT (cui, year) DO UPDATE SET
|
||||
-- For (cui, year) duplicates across categories, prefer WEB_UU (more complete
|
||||
-- schema for normal companies). Don't overwrite a WEB_UU row with a BL_BS_SL row.
|
||||
source = CASE
|
||||
WHEN firms.financials.source = 'mfinante:WEB_UU' THEN firms.financials.source
|
||||
ELSE EXCLUDED.source
|
||||
END,
|
||||
caen = CASE
|
||||
WHEN firms.financials.source = 'mfinante:WEB_UU' THEN firms.financials.caen
|
||||
ELSE EXCLUDED.caen
|
||||
END;
|
||||
SQL
|
||||
}
|
||||
|
||||
# YEARS env var overrides the default daily-run list. Used by the historical
|
||||
# backfill wrapper (import-financials-historical.sh). Default behaviour is
|
||||
# unchanged for the cron job.
|
||||
YEARS="${YEARS:-2020 2021 2022 2023 2024}"
|
||||
for YEAR in $YEARS; do
|
||||
import_year_category "$YEAR" "WEB_UU" "$DATA_DIR/web_uu_${YEAR}.txt"
|
||||
import_year_category "$YEAR" "WEB_BL_BS_SL" "$DATA_DIR/web_bl_bs_sl_${YEAR}.txt"
|
||||
done
|
||||
|
||||
log "=== Refreshing latest-year MV ==="
|
||||
psql -v ON_ERROR_STOP=1 -c "REFRESH MATERIALIZED VIEW firms.mv_financials_latest;"
|
||||
|
||||
log "=== Final stats ==="
|
||||
psql -c "
|
||||
SELECT year, COUNT(*) AS firms_with_data,
|
||||
ROUND(AVG(NULLIF(cifra_afaceri, 0))::numeric, 0) AS avg_ca,
|
||||
COUNT(*) FILTER (WHERE cifra_afaceri > 0) AS cu_ca,
|
||||
COUNT(*) FILTER (WHERE numar_salariati > 0) AS cu_salariati
|
||||
FROM firms.financials
|
||||
GROUP BY year ORDER BY year;
|
||||
" 2>&1 | tee -a "$LOG"
|
||||
|
||||
log "=== Import done ==="
|
||||
Reference in New Issue
Block a user