Files
vreau-digital/services/seap-scraper/sql/012_firms.sql
T
Claude VM a6c03a091e initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
2026-05-13 00:10:32 +03:00

162 lines
7.0 KiB
PL/PgSQL

-- Firms registry — extends seap.cui_location with full ONRC + ANAF data
-- for ALL Romanian companies (~1.5M), not just those active in SEAP.
--
-- Sources:
-- ONRC bulk on data.gov.ro (CC-BY 4.0): COD_INMATRICULARE-keyed CSV files
-- ANAF webservicesp v9: per-CUI enrichment (status, address, contacts)
-- Photon (Komoot) self-hosted: address → lat/lng geocoding
--
-- Idempotent: safe to re-run.
BEGIN;
CREATE SCHEMA IF NOT EXISTS firms;
-- ──────────────────────────────────────────────────────────────────
-- Master firms table — one row per CUI (unique)
-- ──────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS firms.entities (
cui TEXT PRIMARY KEY,
cod_inmatriculare TEXT, -- e.g. J40/630/1992 — ONRC primary key, NULL for PFAs without CUI
euid TEXT, -- European identifier
name TEXT NOT NULL,
forma_juridica TEXT, -- SRL, SA, PFA, II, IF, etc.
-- ── Address (parsed from ONRC) ──
adr_tara TEXT,
adr_judet TEXT,
adr_localitate TEXT,
adr_strada TEXT,
adr_numar TEXT,
adr_bloc TEXT,
adr_scara TEXT,
adr_etaj TEXT,
adr_apartament TEXT,
adr_cod_postal TEXT,
adr_sector TEXT,
adr_completare TEXT, -- raw appendix
adr_full TEXT, -- concatenated, used for geocoding query
siruta TEXT, -- matched UAT siruta (joined with GisUat)
-- ── Geolocation ──
lat DOUBLE PRECISION,
lng DOUBLE PRECISION,
geom GEOGRAPHY(POINT, 4326),
geocode_source TEXT, -- 'photon', 'nominatim', 'siruta_centroid', 'manual'
geocode_score REAL, -- 0..1 confidence
-- ── Registration ──
data_inmatriculare DATE,
registration_year INT,
-- ── Status (from ANAF v9 + ONRC stare_firma) ──
is_active_anaf BOOLEAN, -- NULL=unknown, true=active, false=inactive (lista contribuabili inactivi)
is_radiated_onrc BOOLEAN, -- ONRC stare_firma RADIATA
is_vat_registered BOOLEAN, -- ANAF scpTVA active
is_efactura BOOLEAN, -- ANAF statusRO_e_Factura
status_text TEXT, -- decoded human-readable: "Activă", "Radiată", "Insolvență", etc.
-- ── Contact (best-effort, often NULL) ──
phone TEXT,
fax TEXT,
web TEXT, -- from ONRC OD_FIRME.CSV.WEB column
-- ── Activity classification ──
caen_principal TEXT, -- CAEN cod from ANAF
caen_autorizate TEXT[], -- multi-row aggregate from OD_CAEN_AUTORIZAT.CSV
-- ── Foreign parent ──
tara_firma_mama TEXT, -- from ONRC OD_FIRME.CSV.TARA_FIRMA_MAMA
-- ── Ownership / management (from ONRC reprezentanti) ──
rep_legali JSONB, -- [{persoana, calitate, judet_localitate, tara}, ...]
-- ── Metadata ──
source_onrc_dataset TEXT, -- e.g. 'firme-03-04-2026'
anaf_fetched_at TIMESTAMPTZ,
onrc_fetched_at TIMESTAMPTZ,
geocoded_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_firms_cod_inmatriculare ON firms.entities(cod_inmatriculare) WHERE cod_inmatriculare IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_firms_county ON firms.entities(adr_judet);
CREATE INDEX IF NOT EXISTS idx_firms_siruta ON firms.entities(siruta) WHERE siruta IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_firms_caen_principal ON firms.entities(caen_principal);
CREATE INDEX IF NOT EXISTS idx_firms_geom ON firms.entities USING gist(geom);
CREATE INDEX IF NOT EXISTS idx_firms_name_trgm ON firms.entities USING gin(name gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_firms_active ON firms.entities(is_active_anaf, is_radiated_onrc) WHERE is_active_anaf = true AND (is_radiated_onrc = false OR is_radiated_onrc IS NULL);
-- ──────────────────────────────────────────────────────────────────
-- Staging tables for raw ONRC CSV imports (truncated each refresh)
-- ──────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS firms.staging_onrc_firme (
denumire TEXT,
cui TEXT,
cod_inmatriculare TEXT,
data_inmatriculare TEXT, -- YYYY-MM-DD or empty
euid TEXT,
forma_juridica TEXT,
adr_tara TEXT,
adr_judet TEXT,
adr_localitate TEXT,
adr_strada TEXT,
adr_numar TEXT,
adr_bloc TEXT,
adr_scara TEXT,
adr_etaj TEXT,
adr_apartament TEXT,
adr_cod_postal TEXT,
adr_sector TEXT,
adr_completare TEXT,
web TEXT,
tara_firma_mama TEXT
);
CREATE TABLE IF NOT EXISTS firms.staging_onrc_caen (
cod_inmatriculare TEXT,
cod_caen TEXT,
ver_caen TEXT
);
CREATE TABLE IF NOT EXISTS firms.staging_onrc_stare (
cod_inmatriculare TEXT,
cod_stare TEXT
);
CREATE TABLE IF NOT EXISTS firms.staging_onrc_reprezentanti (
cod_inmatriculare TEXT,
persoana TEXT,
calitate TEXT,
data_nastere TEXT,
localitate_nastere TEXT,
judet_nastere TEXT,
tara_nastere TEXT,
localitate TEXT,
judet TEXT,
tara TEXT
);
-- ──────────────────────────────────────────────────────────────────
-- Stare firma codelist (manually populated — short list ~10 codes)
-- ──────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS firms.stare_codelist (
cod TEXT PRIMARY KEY,
label TEXT NOT NULL
);
INSERT INTO firms.stare_codelist (cod, label) VALUES
('1', 'Activă'),
('2', 'Suspendată activitate'),
('3', 'Dizolvare'),
('4', 'Radiată'),
('5', 'În lichidare'),
('6', 'Insolvență'),
('7', 'Reorganizare judiciară'),
('8', 'Faliment'),
('9', 'Întreruptă activitate')
ON CONFLICT (cod) DO NOTHING;
COMMIT;