a6c03a091e
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
109 lines
4.4 KiB
Bash
Executable File
109 lines
4.4 KiB
Bash
Executable File
#!/bin/bash
|
|
# Import financial indicators (Situații financiare) from data.gov.ro per year.
|
|
# Runs COPY from web_uu_YYYY.txt → staging_financials → firms.financials (PK cui+year).
|
|
|
|
set -euo pipefail
|
|
|
|
DATA_DIR=/opt/vreaudigital/data/mfinante
|
|
LOG=/var/log/vreaudigital-fin-import.log
|
|
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; }
|
|
|
|
source /opt/vreaudigital/.infisical-mi
|
|
TOKEN=$(infisical login --method=universal-auth --domain="$INFISICAL_API_URL" --client-id="$INFISICAL_CLIENT_ID" --client-secret="$INFISICAL_CLIENT_SECRET" --silent --plain)
|
|
DATABASE_URL=$(infisical run --domain="$INFISICAL_API_URL" --projectId="$INFISICAL_PROJECT_ID" --env="$INFISICAL_ENV" --path="$INFISICAL_PATH" --silent --token="$TOKEN" -- sh -c 'echo "$DATABASE_URL"')
|
|
DB=$(echo "$DATABASE_URL" | sed -E 's/[?&]schema=[^&]*//; s/\?$//')
|
|
export PGUSER=$(echo "$DB" | sed -E 's|^postgresql://([^:]+):.*|\1|')
|
|
export PGPASSWORD=$(echo "$DB" | sed -E 's|^postgresql://[^:]+:([^@]+)@.*|\1|')
|
|
export PGHOST=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@([^:/]+).*|\1|')
|
|
export PGDATABASE=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@[^/]+/([^?]+).*|\1|')
|
|
unset DATABASE_URL TOKEN DB
|
|
|
|
log "=== Financial import started ==="
|
|
|
|
# WEB_UU and WEB_BL_BS_SL share the same 22-column schema (CUI, CAEN, I1..I20)
|
|
# so we can use the same staging table + INSERT for both. The `source` column
|
|
# tracks which raw category the row came from. WEB_BL_BS_SL covers special-
|
|
# regime entities (bilanț scurt, lichidare) that aren't in WEB_UU — e.g.
|
|
# Alliance Healthcare, in-liquidation companies. Together they fill most of
|
|
# the financial-data gap.
|
|
|
|
import_year_category() {
|
|
local YEAR="$1"
|
|
local CATEGORY="$2" # WEB_UU | WEB_BL_BS_SL
|
|
local FILE="$3"
|
|
local SRC_LABEL="mfinante:${CATEGORY}"
|
|
|
|
if [ ! -s "$FILE" ]; then
|
|
log "[$YEAR/$CATEGORY] [SKIP] $FILE missing"
|
|
return 0
|
|
fi
|
|
log "[$YEAR/$CATEGORY] Truncating staging..."
|
|
psql -v ON_ERROR_STOP=1 -c "TRUNCATE TABLE firms.staging_financials;"
|
|
|
|
log "[$YEAR/$CATEGORY] COPY $FILE..."
|
|
psql -v ON_ERROR_STOP=1 <<COPYEOF
|
|
\\copy firms.staging_financials (cui, caen, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16, i17, i18, i19, i20) FROM '$FILE' WITH (FORMAT csv, DELIMITER ',', HEADER true, NULL '');
|
|
COPYEOF
|
|
|
|
log "[$YEAR/$CATEGORY] UPSERT into financials (source=$SRC_LABEL)..."
|
|
psql -v ON_ERROR_STOP=1 <<SQL
|
|
INSERT INTO firms.financials (
|
|
cui, year, caen,
|
|
active_imobilizate, active_circulante, stocuri, creante, casa_banci,
|
|
cheltuieli_avans, datorii, venituri_avans, provizioane,
|
|
capitaluri_total, capital_subscris, patrimoniul_regiei,
|
|
cifra_afaceri, venituri_total, cheltuieli_total,
|
|
profit_brut, pierdere_bruta, profit_net, pierdere_neta,
|
|
numar_salariati, source
|
|
)
|
|
SELECT DISTINCT ON (cui)
|
|
cui, $YEAR, caen,
|
|
i1, i2, i3, i4, i5,
|
|
i6, i7, i8, i9,
|
|
i10, i11, i12,
|
|
i13, i14, i15,
|
|
i16, i17, i18, i19,
|
|
-- Sanitize salariati: drop absurd values (data anomalies up to 7.7e14 observed)
|
|
CASE WHEN i20 BETWEEN 0 AND 100000000 THEN i20::bigint ELSE NULL END,
|
|
'$SRC_LABEL'
|
|
FROM firms.staging_financials
|
|
WHERE cui IS NOT NULL AND cui != '' AND cui != '0'
|
|
ORDER BY cui
|
|
ON CONFLICT (cui, year) DO UPDATE SET
|
|
-- For (cui, year) duplicates across categories, prefer WEB_UU (more complete
|
|
-- schema for normal companies). Don't overwrite a WEB_UU row with a BL_BS_SL row.
|
|
source = CASE
|
|
WHEN firms.financials.source = 'mfinante:WEB_UU' THEN firms.financials.source
|
|
ELSE EXCLUDED.source
|
|
END,
|
|
caen = CASE
|
|
WHEN firms.financials.source = 'mfinante:WEB_UU' THEN firms.financials.caen
|
|
ELSE EXCLUDED.caen
|
|
END;
|
|
SQL
|
|
}
|
|
|
|
# YEARS env var overrides the default daily-run list. Used by the historical
|
|
# backfill wrapper (import-financials-historical.sh). Default behaviour is
|
|
# unchanged for the cron job.
|
|
YEARS="${YEARS:-2020 2021 2022 2023 2024}"
|
|
for YEAR in $YEARS; do
|
|
import_year_category "$YEAR" "WEB_UU" "$DATA_DIR/web_uu_${YEAR}.txt"
|
|
import_year_category "$YEAR" "WEB_BL_BS_SL" "$DATA_DIR/web_bl_bs_sl_${YEAR}.txt"
|
|
done
|
|
|
|
log "=== Refreshing latest-year MV ==="
|
|
psql -v ON_ERROR_STOP=1 -c "REFRESH MATERIALIZED VIEW firms.mv_financials_latest;"
|
|
|
|
log "=== Final stats ==="
|
|
psql -c "
|
|
SELECT year, COUNT(*) AS firms_with_data,
|
|
ROUND(AVG(NULLIF(cifra_afaceri, 0))::numeric, 0) AS avg_ca,
|
|
COUNT(*) FILTER (WHERE cifra_afaceri > 0) AS cu_ca,
|
|
COUNT(*) FILTER (WHERE numar_salariati > 0) AS cu_salariati
|
|
FROM firms.financials
|
|
GROUP BY year ORDER BY year;
|
|
" 2>&1 | tee -a "$LOG"
|
|
|
|
log "=== Import done ==="
|