initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
Executable
+272
@@ -0,0 +1,272 @@
|
||||
#!/bin/bash
|
||||
# Import ONRC bulk CSV files into firms.entities.
|
||||
# Source: data.gov.ro (CC-BY 4.0), updated weekly.
|
||||
#
|
||||
# Pipeline:
|
||||
# 1. TRUNCATE staging tables
|
||||
# 2. COPY each CSV (~/data/onrc/*.csv) into corresponding staging table
|
||||
# 3. UPSERT into firms.entities, joining on cod_inmatriculare
|
||||
# 4. Resolve siruta UAT for each firm via county+localitate fuzzy match
|
||||
#
|
||||
# Idempotent. Run nightly via cron.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DATA_DIR=/opt/vreaudigital/data/onrc
|
||||
LOG=/var/log/vreaudigital-onrc-import.log
|
||||
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; }
|
||||
|
||||
log "=== ONRC import started ==="
|
||||
|
||||
# ── Resolve DATABASE_URL via Infisical Machine Identity ──
|
||||
source /opt/vreaudigital/.infisical-mi
|
||||
TOKEN=$(infisical login --method=universal-auth \
|
||||
--domain="$INFISICAL_API_URL" \
|
||||
--client-id="$INFISICAL_CLIENT_ID" \
|
||||
--client-secret="$INFISICAL_CLIENT_SECRET" \
|
||||
--silent --plain)
|
||||
DATABASE_URL=$(infisical run --domain="$INFISICAL_API_URL" \
|
||||
--projectId="$INFISICAL_PROJECT_ID" \
|
||||
--env="$INFISICAL_ENV" --path="$INFISICAL_PATH" \
|
||||
--silent --token="$TOKEN" \
|
||||
-- sh -c 'echo "$DATABASE_URL"')
|
||||
DB=$(echo "$DATABASE_URL" | sed -E 's/[?&]schema=[^&]*//; s/\?$//')
|
||||
# Pass URL to psql via stdin to avoid leaking via `ps aux`.
|
||||
# psql doesn't natively read URL from stdin; use libpq env vars instead.
|
||||
# Parse URL: postgresql://USER:PASS@HOST:PORT/DBNAME
|
||||
DB_USER=$(echo "$DB" | sed -E 's|^postgresql://([^:]+):.*|\1|')
|
||||
DB_PASS=$(echo "$DB" | sed -E 's|^postgresql://[^:]+:([^@]+)@.*|\1|')
|
||||
DB_HOST=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@([^:/]+).*|\1|')
|
||||
DB_PORT=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@[^:]+:([0-9]+)/.*|\1|')
|
||||
DB_NAME=$(echo "$DB" | sed -E 's|^postgresql://[^@]+@[^/]+/([^?]+).*|\1|')
|
||||
export PGUSER="$DB_USER" PGPASSWORD="$DB_PASS" PGHOST="$DB_HOST" PGPORT="$DB_PORT" PGDATABASE="$DB_NAME"
|
||||
unset DATABASE_URL TOKEN DB DB_USER DB_PASS DB_HOST DB_PORT DB_NAME
|
||||
|
||||
# ── Sanity check files ──
|
||||
for f in od_firme.csv od_caen_autorizat.csv od_stare_firma.csv od_reprezentanti_legali.csv; do
|
||||
if [ ! -s "$DATA_DIR/$f" ]; then
|
||||
log "FATAL: $DATA_DIR/$f missing or empty"; exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
DATASET_NAME=$(basename "$(dirname "$(readlink -f "$DATA_DIR/od_firme.csv")")" | head -c 40)
|
||||
log "Dataset name (best guess): $DATASET_NAME"
|
||||
|
||||
# ── Stage CSVs ──
|
||||
log "Truncating staging tables..."
|
||||
psql -v ON_ERROR_STOP=1 -c "
|
||||
TRUNCATE TABLE firms.staging_onrc_firme, firms.staging_onrc_caen,
|
||||
firms.staging_onrc_stare, firms.staging_onrc_reprezentanti;
|
||||
"
|
||||
|
||||
log "COPY od_firme.csv (683MB)..."
|
||||
time psql -v ON_ERROR_STOP=1 <<COPYEOF
|
||||
\\copy firms.staging_onrc_firme (denumire, cui, cod_inmatriculare, data_inmatriculare, euid, forma_juridica, adr_tara, adr_judet, adr_localitate, adr_strada, adr_numar, adr_bloc, adr_scara, adr_etaj, adr_apartament, adr_cod_postal, adr_sector, adr_completare, web, tara_firma_mama) FROM '$DATA_DIR/od_firme.csv' WITH (FORMAT csv, DELIMITER '^', HEADER true, NULL '', QUOTE E'\\b');
|
||||
COPYEOF
|
||||
|
||||
log "COPY od_caen_autorizat.csv..."
|
||||
psql -v ON_ERROR_STOP=1 <<COPYEOF
|
||||
\\copy firms.staging_onrc_caen (cod_inmatriculare, cod_caen, ver_caen) FROM '$DATA_DIR/od_caen_autorizat.csv' WITH (FORMAT csv, DELIMITER '^', HEADER true, NULL '', QUOTE E'\\b');
|
||||
COPYEOF
|
||||
|
||||
log "COPY od_stare_firma.csv..."
|
||||
psql -v ON_ERROR_STOP=1 <<COPYEOF
|
||||
\\copy firms.staging_onrc_stare (cod_inmatriculare, cod_stare) FROM '$DATA_DIR/od_stare_firma.csv' WITH (FORMAT csv, DELIMITER '^', HEADER true, NULL '', QUOTE E'\\b');
|
||||
COPYEOF
|
||||
|
||||
log "COPY od_reprezentanti_legali.csv..."
|
||||
psql -v ON_ERROR_STOP=1 <<COPYEOF
|
||||
\\copy firms.staging_onrc_reprezentanti (cod_inmatriculare, persoana, calitate, data_nastere, localitate_nastere, judet_nastere, tara_nastere, localitate, judet, tara) FROM '$DATA_DIR/od_reprezentanti_legali.csv' WITH (FORMAT csv, DELIMITER '^', HEADER true, NULL '', QUOTE E'\\b');
|
||||
COPYEOF
|
||||
|
||||
# Optional: extras from same dataset (entreprises individuelle + EU branches).
|
||||
# Idempotent — TRUNCATE-and-reload each run.
|
||||
if [ -s "$DATA_DIR/od_reprezentanti_if.csv" ]; then
|
||||
log "COPY od_reprezentanti_if.csv (~13MB)..."
|
||||
psql -v ON_ERROR_STOP=1 -c "TRUNCATE TABLE firms.reprezentanti_if;"
|
||||
psql -v ON_ERROR_STOP=1 <<COPYEOF
|
||||
\\copy firms.reprezentanti_if (cod_inmatriculare, nume, data_nastere, localitate_nastere, judet_nastere, tara_nastere, calitate) FROM '$DATA_DIR/od_reprezentanti_if.csv' WITH (FORMAT csv, DELIMITER '^', HEADER true, NULL '', QUOTE E'\\b');
|
||||
COPYEOF
|
||||
else
|
||||
log "[SKIP] od_reprezentanti_if.csv missing"
|
||||
fi
|
||||
|
||||
if [ -s "$DATA_DIR/od_sucursale_alte_state_membre.csv" ]; then
|
||||
log "COPY od_sucursale_alte_state_membre.csv (small)..."
|
||||
psql -v ON_ERROR_STOP=1 -c "TRUNCATE TABLE firms.sucursale_ue;"
|
||||
psql -v ON_ERROR_STOP=1 <<COPYEOF
|
||||
\\copy firms.sucursale_ue (cod_inmatriculare, tip_unitate, denumire_sucursala, euid, cod_fiscal_strain, tara) FROM '$DATA_DIR/od_sucursale_alte_state_membre.csv' WITH (FORMAT csv, DELIMITER '^', HEADER true, NULL '', QUOTE E'\\b');
|
||||
COPYEOF
|
||||
else
|
||||
log "[SKIP] od_sucursale_alte_state_membre.csv missing"
|
||||
fi
|
||||
|
||||
# ── Aggregate into firms.entities ──
|
||||
log "Building firms.entities from staging..."
|
||||
time psql -v ON_ERROR_STOP=1 <<SQL
|
||||
-- Pre-aggregate stare per cod_inmatriculare (multiple historical states possible — pick latest)
|
||||
DROP TABLE IF EXISTS tmp_stare_agg;
|
||||
CREATE TEMP TABLE tmp_stare_agg AS
|
||||
SELECT DISTINCT ON (cod_inmatriculare) cod_inmatriculare, cod_stare
|
||||
FROM firms.staging_onrc_stare
|
||||
WHERE cod_inmatriculare IS NOT NULL
|
||||
ORDER BY cod_inmatriculare, cod_stare DESC;
|
||||
|
||||
-- Aggregate CAEN per cod_inmatriculare
|
||||
DROP TABLE IF EXISTS tmp_caen_agg;
|
||||
CREATE TEMP TABLE tmp_caen_agg AS
|
||||
SELECT
|
||||
cod_inmatriculare,
|
||||
array_agg(DISTINCT cod_caen ORDER BY cod_caen) FILTER (WHERE cod_caen IS NOT NULL) AS caens
|
||||
FROM firms.staging_onrc_caen
|
||||
WHERE cod_inmatriculare IS NOT NULL
|
||||
GROUP BY cod_inmatriculare;
|
||||
|
||||
-- Aggregate reprezentanti per cod_inmatriculare
|
||||
DROP TABLE IF EXISTS tmp_rep_agg;
|
||||
CREATE TEMP TABLE tmp_rep_agg AS
|
||||
SELECT
|
||||
cod_inmatriculare,
|
||||
jsonb_agg(jsonb_build_object(
|
||||
'persoana', persoana,
|
||||
'calitate', calitate,
|
||||
'localitate', localitate,
|
||||
'judet', judet,
|
||||
'tara', tara
|
||||
)) AS rep_legali
|
||||
FROM firms.staging_onrc_reprezentanti
|
||||
WHERE cod_inmatriculare IS NOT NULL AND persoana IS NOT NULL
|
||||
GROUP BY cod_inmatriculare;
|
||||
|
||||
-- UPSERT firms.entities. CUI as PK.
|
||||
-- Skip rows where CUI is empty/0. DISTINCT ON (cui) — if multiple ONRC rows share the
|
||||
-- same CUI (rare but happens with reorganization), pick the most recently registered.
|
||||
INSERT INTO firms.entities (
|
||||
cui, cod_inmatriculare, euid, name, forma_juridica,
|
||||
adr_tara, adr_judet, adr_localitate, adr_strada, adr_numar,
|
||||
adr_bloc, adr_scara, adr_etaj, adr_apartament, adr_cod_postal,
|
||||
adr_sector, adr_completare,
|
||||
adr_full,
|
||||
data_inmatriculare,
|
||||
registration_year,
|
||||
web,
|
||||
tara_firma_mama,
|
||||
caen_autorizate,
|
||||
rep_legali,
|
||||
status_text,
|
||||
is_radiated_onrc,
|
||||
source_onrc_dataset,
|
||||
onrc_fetched_at,
|
||||
updated_at
|
||||
)
|
||||
SELECT DISTINCT ON (f.cui)
|
||||
f.cui,
|
||||
f.cod_inmatriculare,
|
||||
f.euid,
|
||||
f.denumire,
|
||||
f.forma_juridica,
|
||||
f.adr_tara, f.adr_judet, f.adr_localitate, f.adr_strada, f.adr_numar,
|
||||
f.adr_bloc, f.adr_scara, f.adr_etaj, f.adr_apartament, f.adr_cod_postal,
|
||||
f.adr_sector, f.adr_completare,
|
||||
-- Build adr_full for geocoding
|
||||
COALESCE(
|
||||
NULLIF(trim(concat_ws(', ',
|
||||
NULLIF(trim(concat_ws(' ', f.adr_strada,
|
||||
CASE WHEN f.adr_numar IS NOT NULL THEN 'nr.' || f.adr_numar END
|
||||
)), ''),
|
||||
f.adr_localitate,
|
||||
f.adr_judet,
|
||||
'Romania'
|
||||
)), ''),
|
||||
NULL
|
||||
) AS adr_full,
|
||||
-- ONRC format: DD.MM.YYYY
|
||||
CASE WHEN f.data_inmatriculare ~ '^\d{2}\.\d{2}\.\d{4}'
|
||||
THEN to_date(f.data_inmatriculare, 'DD.MM.YYYY')
|
||||
ELSE NULL END AS data_inmatriculare,
|
||||
CASE WHEN f.data_inmatriculare ~ '\d{4}\$'
|
||||
THEN right(f.data_inmatriculare, 4)::int
|
||||
WHEN f.data_inmatriculare ~ '^\d{2}\.\d{2}\.\d{4}'
|
||||
THEN right(f.data_inmatriculare, 4)::int
|
||||
ELSE NULL END AS registration_year,
|
||||
f.web,
|
||||
f.tara_firma_mama,
|
||||
ca.caens,
|
||||
ra.rep_legali,
|
||||
-- Status: store raw stare code (decoding via ONRC nomenclator e TODO)
|
||||
-- For now: best effort detection of "radiat" pattern
|
||||
COALESCE(ss.cod_stare, 'unknown') AS status_text,
|
||||
false AS is_radiated_onrc, -- TODO: import ONRC stare nomenclator and detect
|
||||
'$DATASET_NAME' AS source_onrc_dataset,
|
||||
now() AS onrc_fetched_at,
|
||||
now() AS updated_at
|
||||
FROM firms.staging_onrc_firme f
|
||||
LEFT JOIN tmp_caen_agg ca ON ca.cod_inmatriculare = f.cod_inmatriculare
|
||||
LEFT JOIN tmp_rep_agg ra ON ra.cod_inmatriculare = f.cod_inmatriculare
|
||||
LEFT JOIN tmp_stare_agg ss ON ss.cod_inmatriculare = f.cod_inmatriculare
|
||||
LEFT JOIN firms.stare_codelist scl ON scl.cod = ss.cod_stare
|
||||
WHERE f.cui IS NOT NULL
|
||||
AND f.cui != ''
|
||||
AND f.cui != '0'
|
||||
AND f.denumire IS NOT NULL
|
||||
ORDER BY f.cui, f.data_inmatriculare DESC NULLS LAST
|
||||
ON CONFLICT (cui) DO UPDATE SET
|
||||
cod_inmatriculare = EXCLUDED.cod_inmatriculare,
|
||||
euid = EXCLUDED.euid,
|
||||
name = EXCLUDED.name,
|
||||
forma_juridica = EXCLUDED.forma_juridica,
|
||||
adr_tara = EXCLUDED.adr_tara,
|
||||
adr_judet = EXCLUDED.adr_judet,
|
||||
adr_localitate = EXCLUDED.adr_localitate,
|
||||
adr_strada = EXCLUDED.adr_strada,
|
||||
adr_numar = EXCLUDED.adr_numar,
|
||||
adr_bloc = EXCLUDED.adr_bloc,
|
||||
adr_scara = EXCLUDED.adr_scara,
|
||||
adr_etaj = EXCLUDED.adr_etaj,
|
||||
adr_apartament = EXCLUDED.adr_apartament,
|
||||
adr_cod_postal = EXCLUDED.adr_cod_postal,
|
||||
adr_sector = EXCLUDED.adr_sector,
|
||||
adr_completare = EXCLUDED.adr_completare,
|
||||
adr_full = EXCLUDED.adr_full,
|
||||
data_inmatriculare = EXCLUDED.data_inmatriculare,
|
||||
registration_year = EXCLUDED.registration_year,
|
||||
web = EXCLUDED.web,
|
||||
tara_firma_mama = EXCLUDED.tara_firma_mama,
|
||||
caen_autorizate = EXCLUDED.caen_autorizate,
|
||||
rep_legali = EXCLUDED.rep_legali,
|
||||
status_text = EXCLUDED.status_text,
|
||||
is_radiated_onrc = EXCLUDED.is_radiated_onrc,
|
||||
source_onrc_dataset = EXCLUDED.source_onrc_dataset,
|
||||
onrc_fetched_at = EXCLUDED.onrc_fetched_at,
|
||||
updated_at = now();
|
||||
|
||||
-- Match siruta UAT for each firm via norm_uat_name
|
||||
UPDATE firms.entities f
|
||||
SET siruta = sub.siruta
|
||||
FROM (
|
||||
SELECT DISTINCT ON (e.cui) e.cui, gu.siruta
|
||||
FROM firms.entities e
|
||||
JOIN public."GisUat" gu
|
||||
ON seap.norm_uat_name(gu.county) = seap.norm_uat_name(e.adr_judet)
|
||||
AND seap.norm_uat_name(gu.name) = seap.norm_uat_name(e.adr_localitate)
|
||||
WHERE e.siruta IS NULL
|
||||
AND e.adr_judet IS NOT NULL
|
||||
AND e.adr_localitate IS NOT NULL
|
||||
ORDER BY e.cui, gu.siruta
|
||||
) sub
|
||||
WHERE f.cui = sub.cui;
|
||||
SQL
|
||||
|
||||
# ── Stats ──
|
||||
log "Final stats:"
|
||||
psql -c "
|
||||
SELECT
|
||||
COUNT(*) AS total_firms,
|
||||
COUNT(*) FILTER (WHERE siruta IS NOT NULL) AS cu_siruta,
|
||||
COUNT(*) FILTER (WHERE rep_legali IS NOT NULL) AS cu_admins,
|
||||
COUNT(*) FILTER (WHERE caen_autorizate IS NOT NULL) AS cu_caen,
|
||||
COUNT(*) FILTER (WHERE is_radiated_onrc = true) AS radiate
|
||||
FROM firms.entities;
|
||||
" 2>&1 | tee -a "$LOG"
|
||||
|
||||
log "=== ONRC import complete ==="
|
||||
Reference in New Issue
Block a user