-- 028_anre.sql -- ANRE — Autoritatea Națională de Reglementare în domeniul Energiei. -- Public license/authorization registries scraped from portal.anre.ro/PublicLists. -- -- Sources (all return JSON via Kendo Grid AJAX endpoint, pageSize=99999 returns full): -- 1. /PublicLists/LicenteAutorizatii → ~4,927 licenses (electricitate) -- flat columns: Societate, Sediu, Localitate, Judet, NrLicenta, DataEmitere, -- DataExpirare, Stare, TipAL, TipActivitate, Comentariu -- 2. /PublicLists/LicenteAutorizatiiGN → ~353 licenses (gaze naturale) -- parent row per company, "Detaliu" is HTML with multiple sub-rows -- (Nr.Document, Tip document, Tip activitate, Localitate, Data emitere, -- Data expirare, Stare, Decizie) -- 3. /PublicLists/Atestate → ~9,745 atestate -- parent row per company, "Detaliu" HTML
w/ Nr.atestat, Tip tarif, -- Data emitere, Data expirare, Stare -- 4. /PublicLists/AutorizatiiElectricieniAutorizati → ~101,529 electricieni autorizati -- flat: NumePrenume, NrRegistru, Localitate, Judet, NrAutorizare, -- TarifAutorizare, TipAutorizare, DataExpirare, Stare -- -- Cross-source value: anre.licente.titular_cui (resolved via firms.normalize_company_name -- fuzzy match) × seap.announcements.supplier_cui = "energy operators with state contracts". -- Red-flag: company wins energy-related SEAP contract but has no ANRE license. CREATE SCHEMA IF NOT EXISTS anre; -- ── 1. Licente & autorizatii (companies) — unified flat ──────────────────── -- One row per distinct license document. license_source distinguishes the -- 3 corporate sources (electricitate / gaze / atestate). Detaliu sub-rows -- from GN/atestate are flattened to one row per sub-row. Source position -- (NrCrt) is preserved in raw_json for traceability. CREATE TABLE IF NOT EXISTS anre.licente ( id char(40) PRIMARY KEY, -- sha1(license_source|license_no|titular_name|data_emitere|tip_al) license_source text NOT NULL, -- 'electricitate' | 'gaze' | 'atestat' license_no text NOT NULL, -- "NrLicenta" / "Nr. Document" / "Nr. atestat" license_type text, -- "Licenta" / "Autorizatie de Infiintare" / "Confirmare Licenta" / "Atestat" license_subtype text, -- "TipActivitate" / "Tip document" / "Tip tarif" (e.g. "Producere", "Furnizare", "Tarif A1") titular_name text NOT NULL, -- raw "Societate" titular_name_norm text, -- firms.normalize_company_name(titular_name) — populated post-insert titular_cui text, -- resolved via fuzzy match (NULL initially) cui_match_score numeric(4,3), cui_match_method text, -- 'exact_norm' / 'trgm_unique' / 'trgm_judet' matched_at timestamptz, sediu text, -- adresa localitate text, judet text, telefon_fax text, data_emitere date, data_expirare date, stare text, -- 'Acordata' / 'Expirata' / 'Retrasa' / 'Suspendata' / 'Incetat valabilitate, sub 1 MW' / etc. decizie text, -- "Nr.Dec. 2223" — only GN comentariu text, -- electricitate only raw_json jsonb, fetched_at timestamptz NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_anre_licente_titular_cui ON anre.licente(titular_cui) WHERE titular_cui IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_anre_licente_titular_norm_trgm ON anre.licente USING gin (titular_name_norm gin_trgm_ops); CREATE INDEX IF NOT EXISTS idx_anre_licente_source_stare ON anre.licente(license_source, stare); CREATE INDEX IF NOT EXISTS idx_anre_licente_data_expirare ON anre.licente(data_expirare); CREATE INDEX IF NOT EXISTS idx_anre_licente_subtype ON anre.licente(license_subtype); COMMENT ON TABLE anre.licente IS 'ANRE licenses & authorizations (electricitate + gaze + atestate). One row per distinct license document. Source: portal.anre.ro/PublicLists/{LicenteAutorizatii,LicenteAutorizatiiGN,Atestate}.'; COMMENT ON COLUMN anre.licente.id IS 'sha1(license_source|license_no|titular_name|data_emitere|license_type) — idempotent upsert key.'; COMMENT ON COLUMN anre.licente.license_source IS '"electricitate" / "gaze" / "atestat" — source registry.'; -- ── 2. Electricieni autorizati (individuals) ─────────────────────────────── -- People, not firms. No CUI; we keep just to enable lookups by name. CREATE TABLE IF NOT EXISTS anre.electricieni ( id bigserial PRIMARY KEY, nume_prenume text NOT NULL, nr_registru integer, -- "NrRegistru" nr_autorizare integer, -- "NrAutorizare" — natural unique key tip_autorizare text, -- "Autorizare Electricieni" tarif text, -- "Tarif II B" / "Tarif IV" localitate text, judet text, telefon_fax text, data_expirare date, stare text, -- "Activ" / "Expirat" / "Retras" raw_json jsonb, fetched_at timestamptz NOT NULL DEFAULT now(), UNIQUE (nr_autorizare, nume_prenume) ); CREATE INDEX IF NOT EXISTS idx_anre_electricieni_judet ON anre.electricieni(judet, stare); CREATE INDEX IF NOT EXISTS idx_anre_electricieni_nume_trgm ON anre.electricieni USING gin (nume_prenume gin_trgm_ops); COMMENT ON TABLE anre.electricieni IS 'ANRE — electricieni autorizati (persoane fizice). Source: portal.anre.ro/PublicLists/AutorizatiiElectricieniAutorizati.'; -- ── 3. Scrape log (mirrors aep.scrape_log convention) ────────────────────── CREATE TABLE IF NOT EXISTS anre.scrape_log ( id bigserial PRIMARY KEY, scraper text NOT NULL, -- 'electricitate' / 'gaze' / 'atestat' / 'electricieni' source_url text NOT NULL, rows_seen integer NOT NULL DEFAULT 0, rows_inserted integer NOT NULL DEFAULT 0, rows_updated integer NOT NULL DEFAULT 0, rows_skipped integer NOT NULL DEFAULT 0, duration_ms integer NOT NULL DEFAULT 0, started_at timestamptz NOT NULL, finished_at timestamptz NOT NULL DEFAULT now(), error text ); CREATE INDEX IF NOT EXISTS idx_anre_scrape_log_started ON anre.scrape_log(started_at DESC); -- ── 4. Materialized view: per-CUI license rollup ─────────────────────────── -- Joinable with seap.announcements.supplier_cui to detect licensed-vs-unlicensed -- energy contractors. CREATE MATERIALIZED VIEW IF NOT EXISTS anre.mv_licente_per_cui AS SELECT titular_cui AS cui, COUNT(*) AS nr_licente_total, COUNT(*) FILTER (WHERE license_source = 'electricitate') AS nr_electricitate, COUNT(*) FILTER (WHERE license_source = 'gaze') AS nr_gaze, COUNT(*) FILTER (WHERE license_source = 'atestat') AS nr_atestate, COUNT(*) FILTER (WHERE stare ILIKE 'Acord%' OR stare ILIKE 'Activ%') AS nr_active, COUNT(*) FILTER (WHERE stare ILIKE 'Expir%') AS nr_expirate, COUNT(*) FILTER (WHERE stare ILIKE 'Retras%' OR stare ILIKE 'Suspend%') AS nr_retrase, array_agg(DISTINCT license_subtype) FILTER (WHERE license_subtype IS NOT NULL) AS subtipuri, array_agg(DISTINCT license_source) AS surse, MIN(data_emitere) AS prima_emitere, MAX(data_emitere) AS ultima_emitere, MAX(data_expirare) AS ultima_expirare FROM anre.licente WHERE titular_cui IS NOT NULL GROUP BY titular_cui; CREATE UNIQUE INDEX IF NOT EXISTS idx_anre_mv_licente_per_cui ON anre.mv_licente_per_cui(cui); COMMENT ON MATERIALIZED VIEW anre.mv_licente_per_cui IS 'Rollup of ANRE licenses per CUI. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY anre.mv_licente_per_cui';