Files
vreau-digital/services/seap-scraper/sql/029_ancom.sql
T
Claude VM a6c03a091e initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
2026-05-13 00:10:32 +03:00

132 lines
7.8 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- 029_ancom.sql
-- ANCOM — Autoritatea Națională pentru Administrare și Reglementare în Comunicații.
-- Public registry of authorized providers of electronic communications networks
-- and services. Source:
-- https://www.ancom.ro/reglementare-ro/comunicatii-electronice/
-- furnizori-comunicatii-electronice/
-- lista-furnizorilor-de-retele-si-servicii-de-comunicatii-autorizati/
--
-- The list is paginated server-side (10 rows/page, ~57 pages → ~570 furnizori).
-- Each row links to a HTML detail page at:
-- https://www.ancom.ro/sablon/furnizorinew_23/?id={id}&pid=4186
--
-- The detail page exposes:
-- • Denumire (titular)
-- • Adresa, Oras/Comuna, Judet/Sector
-- • Cod unic de înregistrare (CUI) — direct, no fuzzy match needed
-- • EUID (Registrul Comerțului) — e.g. ROONRC.J16/3108/1992
-- • R1..R11 — tipuri de retele (Fire metalice, Coaxial, Fibra optica, Mobil,
-- Spectru radio, etc.) cu "Data nasterii dreptului"
-- • S1..S12 — tipuri de servicii (Internet la puncte fixe, Voce mobil,
-- Comunicații interpersonale, etc.)
--
-- Cross-source value:
-- ancom.operatori.titular_cui × seap.announcements.supplier_cui = furnizori
-- telco cu contracte publice. Inverse (anunturi telco CPV 32/64 cu supplier
-- NU în ancom.operatori) = potențial neautorizat.
--
-- Schema layout:
-- 1. ancom.operatori — flat row per provider (CUI direct from page)
-- 2. ancom.drepturi — long table: 1 row per (operator, R/S code)
-- cu data nasterii dreptului. Permite filtrare
-- pe tip retea/serviciu (R3=fibra optica etc.)
-- 3. ancom.scrape_log — mirrors anre.scrape_log convention
-- 4. ancom.mv_operatori_per_cui— rollup pentru join cu seap.announcements
CREATE SCHEMA IF NOT EXISTS ancom;
-- ── 1. Operatori (furnizori autorizati) ────────────────────────────────────
-- One row per ancom.id (the registry numeric id from sablon/furnizorinew_23).
-- ancom_id is PK because it's the natural unique key in the registry.
CREATE TABLE IF NOT EXISTS ancom.operatori (
ancom_id integer PRIMARY KEY, -- ?id={N} in detail URL
titular_name text NOT NULL, -- raw from list table
titular_name_norm text, -- firms.normalize_company_name() — for unmatched-CUI fallback
titular_cui text, -- direct from detail page; idempotent string ('3071154')
cui_match_method text, -- 'direct' (from page) | 'exact_norm' | 'trgm_unique' | 'trgm_judet'
cui_match_score numeric(4,3),
matched_at timestamptz,
euid text, -- 'ROONRC.J16/3108/1992' — Registrul Comerțului
adresa text,
oras text,
judet text, -- 'DOLJ', 'SECTOR 1', etc.
list_judet text, -- judet from list (may differ from detail)
detail_url text NOT NULL, -- canonical URL
status text NOT NULL DEFAULT 'autorizat', -- 'autorizat' | 'radiat' | 'sanctionat' | 'inactiv'
raw_html_hash text, -- sha1 of detail HTML body — change detection
fetched_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_cui ON ancom.operatori(titular_cui) WHERE titular_cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_name_norm ON ancom.operatori USING gin (titular_name_norm gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_judet ON ancom.operatori(judet);
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_status ON ancom.operatori(status);
COMMENT ON TABLE ancom.operatori IS
'ANCOM authorized communications providers. One row per ancom_id from registry. Source: ancom.ro/reglementare-ro/.../lista-furnizorilor-...autorizati/';
COMMENT ON COLUMN ancom.operatori.ancom_id IS
'Natural unique key from detail URL ?id={N}&pid=4186. Stable across scrapes.';
COMMENT ON COLUMN ancom.operatori.titular_cui IS
'CUI direct from detail page "Cod unic de înregistrare". Most rows match — fuzzy fallback used only when missing.';
-- ── 2. Drepturi (R1..R11 + S1..S12 catalog) ────────────────────────────────
-- Long table — one row per (operator, code). Lets us answer:
-- "câți furnizori au drept S2 (mobil) activ?"
-- "în Cluj câți furnizori au R3 (fibră optică)?"
CREATE TABLE IF NOT EXISTS ancom.drepturi (
ancom_id integer NOT NULL REFERENCES ancom.operatori(ancom_id) ON DELETE CASCADE,
cod text NOT NULL, -- 'R1' .. 'R11' | 'S1' .. 'S12'
tip text NOT NULL, -- 'retea' | 'serviciu'
descriere text, -- 'Fire metalice (DSL)' / 'Internet la puncte fixe' / etc.
data_nasterii date, -- "Data nasterii dreptului"
PRIMARY KEY (ancom_id, cod)
);
CREATE INDEX IF NOT EXISTS idx_ancom_drepturi_cod ON ancom.drepturi(cod);
CREATE INDEX IF NOT EXISTS idx_ancom_drepturi_tip ON ancom.drepturi(tip);
COMMENT ON TABLE ancom.drepturi IS
'Drepturile fiecarui furnizor — R1..R11 (retele) + S1..S12 (servicii) cu data nasterii dreptului. Long table, one row per (operator, code).';
-- ── 3. Scrape log ──────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS ancom.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'autorizati' | 'radiati' | 'sanctionati'
source_url text NOT NULL,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_ancom_scrape_log_started ON ancom.scrape_log(started_at DESC);
-- ── 4. Per-CUI rollup (joinable with seap.announcements.supplier_cui) ──────
CREATE MATERIALIZED VIEW IF NOT EXISTS ancom.mv_operatori_per_cui AS
SELECT
o.titular_cui AS cui,
COUNT(*) AS nr_autorizatii,
array_agg(DISTINCT o.ancom_id ORDER BY o.ancom_id) AS ancom_ids,
array_agg(DISTINCT d.cod) FILTER (WHERE d.tip = 'retea') AS retele,
array_agg(DISTINCT d.cod) FILTER (WHERE d.tip = 'serviciu') AS servicii,
bool_or(d.cod = 'S1') AS are_internet_fix,
bool_or(d.cod = 'S2') AS are_mobil,
bool_or(d.cod = 'R3') AS are_fibra,
bool_or(o.status = 'autorizat') AS are_status_activ,
MIN(d.data_nasterii) AS prima_autorizare,
MAX(d.data_nasterii) AS ultima_autorizare,
MAX(o.fetched_at) AS ultima_actualizare
FROM ancom.operatori o
LEFT JOIN ancom.drepturi d ON d.ancom_id = o.ancom_id
WHERE o.titular_cui IS NOT NULL
GROUP BY o.titular_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_ancom_mv_per_cui ON ancom.mv_operatori_per_cui(cui);
COMMENT ON MATERIALIZED VIEW ancom.mv_operatori_per_cui IS
'Rollup ANCOM per CUI (autorizatii + tipuri de retele/servicii). Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY ancom.mv_operatori_per_cui';