-- 028_anre.sql
-- ANRE — Autoritatea Națională de Reglementare în domeniul Energiei.
-- Public license/authorization registries scraped from portal.anre.ro/PublicLists.
--
-- Sources (all return JSON via Kendo Grid AJAX endpoint, pageSize=99999 returns full):
-- 1. /PublicLists/LicenteAutorizatii → ~4,927 licenses (electricitate)
-- flat columns: Societate, Sediu, Localitate, Judet, NrLicenta, DataEmitere,
-- DataExpirare, Stare, TipAL, TipActivitate, Comentariu
-- 2. /PublicLists/LicenteAutorizatiiGN → ~353 licenses (gaze naturale)
-- parent row per company, "Detaliu" is HTML
with multiple sub-rows
-- (Nr.Document, Tip document, Tip activitate, Localitate, Data emitere,
-- Data expirare, Stare, Decizie)
-- 3. /PublicLists/Atestate → ~9,745 atestate
-- parent row per company, "Detaliu" HTML w/ Nr.atestat, Tip tarif,
-- Data emitere, Data expirare, Stare
-- 4. /PublicLists/AutorizatiiElectricieniAutorizati → ~101,529 electricieni autorizati
-- flat: NumePrenume, NrRegistru, Localitate, Judet, NrAutorizare,
-- TarifAutorizare, TipAutorizare, DataExpirare, Stare
--
-- Cross-source value: anre.licente.titular_cui (resolved via firms.normalize_company_name
-- fuzzy match) × seap.announcements.supplier_cui = "energy operators with state contracts".
-- Red-flag: company wins energy-related SEAP contract but has no ANRE license.
CREATE SCHEMA IF NOT EXISTS anre;
-- ── 1. Licente & autorizatii (companies) — unified flat ────────────────────
-- One row per distinct license document. license_source distinguishes the
-- 3 corporate sources (electricitate / gaze / atestate). Detaliu sub-rows
-- from GN/atestate are flattened to one row per sub-row. Source position
-- (NrCrt) is preserved in raw_json for traceability.
CREATE TABLE IF NOT EXISTS anre.licente (
id char(40) PRIMARY KEY, -- sha1(license_source|license_no|titular_name|data_emitere|tip_al)
license_source text NOT NULL, -- 'electricitate' | 'gaze' | 'atestat'
license_no text NOT NULL, -- "NrLicenta" / "Nr. Document" / "Nr. atestat"
license_type text, -- "Licenta" / "Autorizatie de Infiintare" / "Confirmare Licenta" / "Atestat"
license_subtype text, -- "TipActivitate" / "Tip document" / "Tip tarif" (e.g. "Producere", "Furnizare", "Tarif A1")
titular_name text NOT NULL, -- raw "Societate"
titular_name_norm text, -- firms.normalize_company_name(titular_name) — populated post-insert
titular_cui text, -- resolved via fuzzy match (NULL initially)
cui_match_score numeric(4,3),
cui_match_method text, -- 'exact_norm' / 'trgm_unique' / 'trgm_judet'
matched_at timestamptz,
sediu text, -- adresa
localitate text,
judet text,
telefon_fax text,
data_emitere date,
data_expirare date,
stare text, -- 'Acordata' / 'Expirata' / 'Retrasa' / 'Suspendata' / 'Incetat valabilitate, sub 1 MW' / etc.
decizie text, -- "Nr.Dec. 2223" — only GN
comentariu text, -- electricitate only
raw_json jsonb,
fetched_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_anre_licente_titular_cui ON anre.licente(titular_cui) WHERE titular_cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_anre_licente_titular_norm_trgm ON anre.licente USING gin (titular_name_norm gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_anre_licente_source_stare ON anre.licente(license_source, stare);
CREATE INDEX IF NOT EXISTS idx_anre_licente_data_expirare ON anre.licente(data_expirare);
CREATE INDEX IF NOT EXISTS idx_anre_licente_subtype ON anre.licente(license_subtype);
COMMENT ON TABLE anre.licente IS
'ANRE licenses & authorizations (electricitate + gaze + atestate). One row per distinct license document. Source: portal.anre.ro/PublicLists/{LicenteAutorizatii,LicenteAutorizatiiGN,Atestate}.';
COMMENT ON COLUMN anre.licente.id IS
'sha1(license_source|license_no|titular_name|data_emitere|license_type) — idempotent upsert key.';
COMMENT ON COLUMN anre.licente.license_source IS
'"electricitate" / "gaze" / "atestat" — source registry.';
-- ── 2. Electricieni autorizati (individuals) ───────────────────────────────
-- People, not firms. No CUI; we keep just to enable lookups by name.
CREATE TABLE IF NOT EXISTS anre.electricieni (
id bigserial PRIMARY KEY,
nume_prenume text NOT NULL,
nr_registru integer, -- "NrRegistru"
nr_autorizare integer, -- "NrAutorizare" — natural unique key
tip_autorizare text, -- "Autorizare Electricieni"
tarif text, -- "Tarif II B" / "Tarif IV"
localitate text,
judet text,
telefon_fax text,
data_expirare date,
stare text, -- "Activ" / "Expirat" / "Retras"
raw_json jsonb,
fetched_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (nr_autorizare, nume_prenume)
);
CREATE INDEX IF NOT EXISTS idx_anre_electricieni_judet ON anre.electricieni(judet, stare);
CREATE INDEX IF NOT EXISTS idx_anre_electricieni_nume_trgm ON anre.electricieni USING gin (nume_prenume gin_trgm_ops);
COMMENT ON TABLE anre.electricieni IS
'ANRE — electricieni autorizati (persoane fizice). Source: portal.anre.ro/PublicLists/AutorizatiiElectricieniAutorizati.';
-- ── 3. Scrape log (mirrors aep.scrape_log convention) ──────────────────────
CREATE TABLE IF NOT EXISTS anre.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'electricitate' / 'gaze' / 'atestat' / 'electricieni'
source_url text NOT NULL,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_anre_scrape_log_started ON anre.scrape_log(started_at DESC);
-- ── 4. Materialized view: per-CUI license rollup ───────────────────────────
-- Joinable with seap.announcements.supplier_cui to detect licensed-vs-unlicensed
-- energy contractors.
CREATE MATERIALIZED VIEW IF NOT EXISTS anre.mv_licente_per_cui AS
SELECT
titular_cui AS cui,
COUNT(*) AS nr_licente_total,
COUNT(*) FILTER (WHERE license_source = 'electricitate') AS nr_electricitate,
COUNT(*) FILTER (WHERE license_source = 'gaze') AS nr_gaze,
COUNT(*) FILTER (WHERE license_source = 'atestat') AS nr_atestate,
COUNT(*) FILTER (WHERE stare ILIKE 'Acord%' OR stare ILIKE 'Activ%') AS nr_active,
COUNT(*) FILTER (WHERE stare ILIKE 'Expir%') AS nr_expirate,
COUNT(*) FILTER (WHERE stare ILIKE 'Retras%' OR stare ILIKE 'Suspend%') AS nr_retrase,
array_agg(DISTINCT license_subtype) FILTER (WHERE license_subtype IS NOT NULL) AS subtipuri,
array_agg(DISTINCT license_source) AS surse,
MIN(data_emitere) AS prima_emitere,
MAX(data_emitere) AS ultima_emitere,
MAX(data_expirare) AS ultima_expirare
FROM anre.licente
WHERE titular_cui IS NOT NULL
GROUP BY titular_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_anre_mv_licente_per_cui ON anre.mv_licente_per_cui(cui);
COMMENT ON MATERIALIZED VIEW anre.mv_licente_per_cui IS
'Rollup of ANRE licenses per CUI. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY anre.mv_licente_per_cui';