-- 029_ancom.sql -- ANCOM — Autoritatea Națională pentru Administrare și Reglementare în Comunicații. -- Public registry of authorized providers of electronic communications networks -- and services. Source: -- https://www.ancom.ro/reglementare-ro/comunicatii-electronice/ -- furnizori-comunicatii-electronice/ -- lista-furnizorilor-de-retele-si-servicii-de-comunicatii-autorizati/ -- -- The list is paginated server-side (10 rows/page, ~57 pages → ~570 furnizori). -- Each row links to a HTML detail page at: -- https://www.ancom.ro/sablon/furnizorinew_23/?id={id}&pid=4186 -- -- The detail page exposes: -- • Denumire (titular) -- • Adresa, Oras/Comuna, Judet/Sector -- • Cod unic de înregistrare (CUI) — direct, no fuzzy match needed -- • EUID (Registrul Comerțului) — e.g. ROONRC.J16/3108/1992 -- • R1..R11 — tipuri de retele (Fire metalice, Coaxial, Fibra optica, Mobil, -- Spectru radio, etc.) cu "Data nasterii dreptului" -- • S1..S12 — tipuri de servicii (Internet la puncte fixe, Voce mobil, -- Comunicații interpersonale, etc.) -- -- Cross-source value: -- ancom.operatori.titular_cui × seap.announcements.supplier_cui = furnizori -- telco cu contracte publice. Inverse (anunturi telco CPV 32/64 cu supplier -- NU în ancom.operatori) = potențial neautorizat. -- -- Schema layout: -- 1. ancom.operatori — flat row per provider (CUI direct from page) -- 2. ancom.drepturi — long table: 1 row per (operator, R/S code) -- cu data nasterii dreptului. Permite filtrare -- pe tip retea/serviciu (R3=fibra optica etc.) -- 3. ancom.scrape_log — mirrors anre.scrape_log convention -- 4. ancom.mv_operatori_per_cui— rollup pentru join cu seap.announcements CREATE SCHEMA IF NOT EXISTS ancom; -- ── 1. Operatori (furnizori autorizati) ──────────────────────────────────── -- One row per ancom.id (the registry numeric id from sablon/furnizorinew_23). -- ancom_id is PK because it's the natural unique key in the registry. CREATE TABLE IF NOT EXISTS ancom.operatori ( ancom_id integer PRIMARY KEY, -- ?id={N} in detail URL titular_name text NOT NULL, -- raw from list table titular_name_norm text, -- firms.normalize_company_name() — for unmatched-CUI fallback titular_cui text, -- direct from detail page; idempotent string ('3071154') cui_match_method text, -- 'direct' (from page) | 'exact_norm' | 'trgm_unique' | 'trgm_judet' cui_match_score numeric(4,3), matched_at timestamptz, euid text, -- 'ROONRC.J16/3108/1992' — Registrul Comerțului adresa text, oras text, judet text, -- 'DOLJ', 'SECTOR 1', etc. list_judet text, -- judet from list (may differ from detail) detail_url text NOT NULL, -- canonical URL status text NOT NULL DEFAULT 'autorizat', -- 'autorizat' | 'radiat' | 'sanctionat' | 'inactiv' raw_html_hash text, -- sha1 of detail HTML body — change detection fetched_at timestamptz NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_ancom_operatori_cui ON ancom.operatori(titular_cui) WHERE titular_cui IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_ancom_operatori_name_norm ON ancom.operatori USING gin (titular_name_norm gin_trgm_ops); CREATE INDEX IF NOT EXISTS idx_ancom_operatori_judet ON ancom.operatori(judet); CREATE INDEX IF NOT EXISTS idx_ancom_operatori_status ON ancom.operatori(status); COMMENT ON TABLE ancom.operatori IS 'ANCOM authorized communications providers. One row per ancom_id from registry. Source: ancom.ro/reglementare-ro/.../lista-furnizorilor-...autorizati/'; COMMENT ON COLUMN ancom.operatori.ancom_id IS 'Natural unique key from detail URL ?id={N}&pid=4186. Stable across scrapes.'; COMMENT ON COLUMN ancom.operatori.titular_cui IS 'CUI direct from detail page "Cod unic de înregistrare". Most rows match — fuzzy fallback used only when missing.'; -- ── 2. Drepturi (R1..R11 + S1..S12 catalog) ──────────────────────────────── -- Long table — one row per (operator, code). Lets us answer: -- "câți furnizori au drept S2 (mobil) activ?" -- "în Cluj câți furnizori au R3 (fibră optică)?" CREATE TABLE IF NOT EXISTS ancom.drepturi ( ancom_id integer NOT NULL REFERENCES ancom.operatori(ancom_id) ON DELETE CASCADE, cod text NOT NULL, -- 'R1' .. 'R11' | 'S1' .. 'S12' tip text NOT NULL, -- 'retea' | 'serviciu' descriere text, -- 'Fire metalice (DSL)' / 'Internet la puncte fixe' / etc. data_nasterii date, -- "Data nasterii dreptului" PRIMARY KEY (ancom_id, cod) ); CREATE INDEX IF NOT EXISTS idx_ancom_drepturi_cod ON ancom.drepturi(cod); CREATE INDEX IF NOT EXISTS idx_ancom_drepturi_tip ON ancom.drepturi(tip); COMMENT ON TABLE ancom.drepturi IS 'Drepturile fiecarui furnizor — R1..R11 (retele) + S1..S12 (servicii) cu data nasterii dreptului. Long table, one row per (operator, code).'; -- ── 3. Scrape log ────────────────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS ancom.scrape_log ( id bigserial PRIMARY KEY, scraper text NOT NULL, -- 'autorizati' | 'radiati' | 'sanctionati' source_url text NOT NULL, rows_seen integer NOT NULL DEFAULT 0, rows_inserted integer NOT NULL DEFAULT 0, rows_updated integer NOT NULL DEFAULT 0, rows_skipped integer NOT NULL DEFAULT 0, duration_ms integer NOT NULL DEFAULT 0, started_at timestamptz NOT NULL, finished_at timestamptz NOT NULL DEFAULT now(), error text ); CREATE INDEX IF NOT EXISTS idx_ancom_scrape_log_started ON ancom.scrape_log(started_at DESC); -- ── 4. Per-CUI rollup (joinable with seap.announcements.supplier_cui) ────── CREATE MATERIALIZED VIEW IF NOT EXISTS ancom.mv_operatori_per_cui AS SELECT o.titular_cui AS cui, COUNT(*) AS nr_autorizatii, array_agg(DISTINCT o.ancom_id ORDER BY o.ancom_id) AS ancom_ids, array_agg(DISTINCT d.cod) FILTER (WHERE d.tip = 'retea') AS retele, array_agg(DISTINCT d.cod) FILTER (WHERE d.tip = 'serviciu') AS servicii, bool_or(d.cod = 'S1') AS are_internet_fix, bool_or(d.cod = 'S2') AS are_mobil, bool_or(d.cod = 'R3') AS are_fibra, bool_or(o.status = 'autorizat') AS are_status_activ, MIN(d.data_nasterii) AS prima_autorizare, MAX(d.data_nasterii) AS ultima_autorizare, MAX(o.fetched_at) AS ultima_actualizare FROM ancom.operatori o LEFT JOIN ancom.drepturi d ON d.ancom_id = o.ancom_id WHERE o.titular_cui IS NOT NULL GROUP BY o.titular_cui; CREATE UNIQUE INDEX IF NOT EXISTS idx_ancom_mv_per_cui ON ancom.mv_operatori_per_cui(cui); COMMENT ON MATERIALIZED VIEW ancom.mv_operatori_per_cui IS 'Rollup ANCOM per CUI (autorizatii + tipuri de retele/servicii). Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY ancom.mv_operatori_per_cui';