initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
@@ -0,0 +1,197 @@
|
||||
-- 031_cnas.sql
|
||||
-- CNAS — Casa Națională de Asigurări de Sănătate.
|
||||
-- Lista furnizorilor de servicii medicale aflați în relație contractuală cu CAS-urile județene.
|
||||
--
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- Source landscape (investigated 2026-05-10):
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- The CNAS source ecosystem is in active migration. There are 3 layers:
|
||||
--
|
||||
-- 1. cnas.ro/wp-content/uploads/ — central WordPress media library, 4.18K
|
||||
-- files total. ~70-90 are "furnizori" PDFs (national + per-județ snapshots).
|
||||
-- Discoverable via /wp-json/wp/v2/media REST API. 99% PDF, ~5 XLSX.
|
||||
-- → THIS IS THE PRIMARY SOURCE for v1.
|
||||
--
|
||||
-- 2. cas.cnas.ro/casXX — new Angular SPA per județ (42 sub-instances). The
|
||||
-- backend admin/api is a Blazor app at /admin/api/* with X-Instance-Key
|
||||
-- header routing. As of 2026-05, most endpoints return [] or 500. The data
|
||||
-- hasn't been migrated to the new infra yet.
|
||||
-- → DEFERRED — track via watch script, ingest when populated.
|
||||
--
|
||||
-- 3. www.cnas.ro/casXX/page/lista-furnizori-*.html — old pre-migration WP.
|
||||
-- 301-redirects to cnas.ro/casXX/ which is now a stub. Effectively dead
|
||||
-- for our purposes; some files survive in CKEditor uploads on the old
|
||||
-- cas.cnas.ro/casXX/theme/cnas/js/ckeditor/filemanager/userfiles/...
|
||||
-- → DEFERRED — best harvested via Wayback CDX.
|
||||
--
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- File naming convention (cnas.ro/wp-content/uploads/):
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- Most files DON'T contain CUI codes. They contain provider names + sediu +
|
||||
-- contact data. CUI matching is post-ingest via firms.normalize_company_name
|
||||
-- + trgm fuzzy search (mirroring anre.licente pattern).
|
||||
--
|
||||
-- Filename signals tip_serviciu:
|
||||
-- FURNIZORI-SPITALE-IN-CONTRACT-2024.pdf → tip='spital'
|
||||
-- FURNIZORI-IN-CONTRACT-MEDICINA-DE-FAMILIE.. → tip='medicina_familie'
|
||||
-- FURNIZORI-DE-SERVICII-FARMACEUTICE-.. → tip='farmacie'
|
||||
-- FURNIZORI-DISPOZITIVE-MEDICALE-.. → tip='dispozitive_medicale'
|
||||
-- FURNIZORI-MEDICINA-DENTARA-.. → tip='medicina_dentara'
|
||||
-- FURNIZORI-INGRIJIRI-MEDICALE-.. → tip='ingrijiri_medicale'
|
||||
-- FURNIZORI-INGRIJIRI-PALIATIVE-.. → tip='ingrijiri_paliative'
|
||||
-- FURNIZORI-RECUPERARE-MEDICALA-.. → tip='recuperare_medicala'
|
||||
-- FURNIZORI-PNS-.. → tip='pns' (programe nationale)
|
||||
-- FURNIZORI-IN-CONTRACT-AMBULATORIU-CLINIC-.. → tip='ambulatoriu_clinic'
|
||||
-- FURNIZORI-IN-CONTRACT-AMBULATORIU-PARACLIN.. → tip='paraclinic'
|
||||
-- FURNIZORI-..-URGENTA-PRESPITALICEASCA-.. → tip='urgenta_transport'
|
||||
-- Lista-furnizori-clinice-.. → tip='clinic'
|
||||
-- Lista-furnizori-RECA-.. → tip='recuperare_a'
|
||||
-- Lista-furnizori-radioterapie-.. → tip='radioterapie'
|
||||
-- Lista-furnizori-testare-genetica-.. → tip='testare_genetica'
|
||||
-- Lista-furnizori-tumori-solide-maligne-.. → tip='oncologie'
|
||||
--
|
||||
-- Județ extraction is heuristic: from filename (e.g. CAS-GORJ, CAS-ARAD) OR
|
||||
-- from PDF page header ("CASA DE ASIGURARI A JUDETULUI GORJ"). When both fail,
|
||||
-- it's a national list (rare — most centrally-uploaded files are actually
|
||||
-- per-județ, as the PDFs are produced by individual CAS-uri).
|
||||
--
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- Cross-source value:
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- 1. cnas.furnizori.cui (resolved post-ingest) × seap.announcements.supplier_cui
|
||||
-- @ CPV 33.* (medical equipment) / 85.* (medical services) =
|
||||
-- "medical providers winning state contracts directly + via insurance"
|
||||
-- 2. cnas.furnizori.cui × anaf_datornici.datornic.cui = "spitale & clinici cu
|
||||
-- datorii la stat" — red-flag pattern.
|
||||
-- 3. cnas.furnizori.cui × fonduri.proiect.beneficiar_cui (POIM-Sănătate) =
|
||||
-- EU-funded healthcare providers.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS cnas;
|
||||
|
||||
-- ── 1. Documents (file metadata catalog) ───────────────────────────────────
|
||||
-- Tracks every PDF/XLSX harvested from cnas.ro WP media. One row per file URL.
|
||||
-- Idempotent re-fetch: same URL → UPDATE fetched_at + parsed_at.
|
||||
CREATE TABLE IF NOT EXISTS cnas.documents (
|
||||
id bigserial PRIMARY KEY,
|
||||
source_url text NOT NULL UNIQUE,
|
||||
source text NOT NULL, -- 'wp-media' | 'cas-cnas-spa' | 'wayback' | 'manual'
|
||||
wp_media_id bigint, -- /wp-json/wp/v2/media id (when source='wp-media')
|
||||
title text,
|
||||
filename text,
|
||||
mime_type text,
|
||||
file_size_bytes integer,
|
||||
file_sha1 char(40), -- of downloaded bytes (used to detect changes)
|
||||
published_at timestamptz, -- WP "date" (publication on CNAS site)
|
||||
-- Inferred classification (heuristic from filename + content):
|
||||
tip_serviciu text, -- 'spital' / 'farmacie' / 'medicina_familie' / etc. (see header above)
|
||||
judet text, -- "GORJ" / "BIHOR" / NULL when national or unknown
|
||||
perioada text, -- raw period from filename ("01.07.2024", "decembrie-2024")
|
||||
page_count integer, -- for PDFs, post-parse
|
||||
parse_status text NOT NULL DEFAULT 'pending',-- 'pending' / 'ok' / 'failed' / 'unsupported_format' / 'no_table'
|
||||
parse_error text,
|
||||
rows_extracted integer NOT NULL DEFAULT 0,
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
parsed_at timestamptz
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_documents_tip ON cnas.documents(tip_serviciu);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_documents_judet ON cnas.documents(judet);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_documents_published ON cnas.documents(published_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_documents_parse_status ON cnas.documents(parse_status);
|
||||
|
||||
COMMENT ON TABLE cnas.documents IS
|
||||
'Catalog of every CNAS provider-list document (PDF/XLSX) harvested. One row per source URL. Source: cnas.ro/wp-json/wp/v2/media + future SPA endpoints.';
|
||||
COMMENT ON COLUMN cnas.documents.tip_serviciu IS
|
||||
'Service category inferred from filename: spital, farmacie, medicina_familie, medicina_dentara, dispozitive_medicale, ambulatoriu_clinic, paraclinic, ingrijiri_medicale, ingrijiri_paliative, recuperare_medicala, urgenta_transport, pns, clinic, oncologie, testare_genetica, radioterapie, other.';
|
||||
|
||||
-- ── 2. Furnizori (extracted provider records) ──────────────────────────────
|
||||
-- One row per (document, NR_CRT). Provider name is the fundamental key — CUI
|
||||
-- is resolved POST-INSERT via firms.normalize_company_name fuzzy match (mirror
|
||||
-- of anre.licente pattern).
|
||||
--
|
||||
-- We allow the same legal entity (same CUI) to appear MULTIPLE times across
|
||||
-- documents (e.g. same hospital listed in spitale + paraclinic + clinic lists).
|
||||
-- Dedup is via mv_cnas_per_cui rollup, not at insert time.
|
||||
CREATE TABLE IF NOT EXISTS cnas.furnizori (
|
||||
id bigserial PRIMARY KEY,
|
||||
document_id bigint NOT NULL REFERENCES cnas.documents(id) ON DELETE CASCADE,
|
||||
-- Document context (denormalized for fast filtering):
|
||||
tip_serviciu text, -- inherited from document
|
||||
judet text, -- inherited from document (or row-level when available)
|
||||
perioada text, -- "la 01.07.2024" etc.
|
||||
-- Source row data:
|
||||
nr_crt integer, -- in-document index (1..N)
|
||||
cod_furnizor text, -- "BH01" / "CT12" — CAS-internal supplier code (when present)
|
||||
name text NOT NULL, -- raw from document ("Spitalul Clinic Județean Oradea", "DR.HEIM HERMINA")
|
||||
name_norm text, -- firms.normalize_company_name — populated post-insert
|
||||
reprezentant text, -- legal rep / cabinet doctor (for medicina familie, dentara)
|
||||
sediu text, -- adresa
|
||||
localitate text,
|
||||
telefon text,
|
||||
fax text,
|
||||
email text,
|
||||
specialitate text, -- pentru ambulatoriu, paraclinic, clinic
|
||||
-- CUI matching (resolved post-ingest):
|
||||
cui text,
|
||||
cui_match_score numeric(4,3),
|
||||
cui_match_method text, -- 'exact_norm' / 'trgm_unique' / 'trgm_judet' / 'manual'
|
||||
matched_at timestamptz,
|
||||
raw_text text, -- the raw text-row from PDF for debugging
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
UNIQUE (document_id, nr_crt, name)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_furn_cui ON cnas.furnizori(cui) WHERE cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_furn_judet_tip ON cnas.furnizori(judet, tip_serviciu);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_furn_name_trgm ON cnas.furnizori USING gin (name_norm gin_trgm_ops);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_furn_document ON cnas.furnizori(document_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_furn_localitate ON cnas.furnizori(localitate) WHERE localitate IS NOT NULL;
|
||||
|
||||
COMMENT ON TABLE cnas.furnizori IS
|
||||
'Furnizori de servicii medicale extrași din PDF-urile CNAS. One row per (document, nr_crt, name). CUI resolved post-insert via firms.normalize_company_name + trgm fuzzy.';
|
||||
COMMENT ON COLUMN cnas.furnizori.cod_furnizor IS
|
||||
'Internal CAS supplier code, e.g. "BH01" (Bihor seq 01), "MB427" (București-Ilfov seq 427). NOT a CUI.';
|
||||
COMMENT ON COLUMN cnas.furnizori.cui IS
|
||||
'Resolved CUI via firms.normalize_company_name + pg_trgm match. NULL = unmatched; cabinete medicale individuale (CMI) often have no CUI in firms registry.';
|
||||
|
||||
-- ── 3. Scrape log ─────────────────────────────────────────────────────────
|
||||
-- Mirrors anre.scrape_log convention.
|
||||
CREATE TABLE IF NOT EXISTS cnas.scrape_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
scraper text NOT NULL, -- 'wp-media-list' / 'parse-pdf' / 'match-cui' / 'cas-cnas-spa'
|
||||
source_url text,
|
||||
rows_seen integer NOT NULL DEFAULT 0,
|
||||
rows_inserted integer NOT NULL DEFAULT 0,
|
||||
rows_updated integer NOT NULL DEFAULT 0,
|
||||
rows_skipped integer NOT NULL DEFAULT 0,
|
||||
duration_ms integer NOT NULL DEFAULT 0,
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||||
error text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_scrape_log_started ON cnas.scrape_log(started_at DESC);
|
||||
|
||||
-- ── 4. Materialized view: per-CUI provider rollup ─────────────────────────
|
||||
-- Joinable with seap.announcements + anaf_datornici + fonduri.proiect for
|
||||
-- cross-source detection. Refreshed via cron after match-cui pass.
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS cnas.mv_furnizori_per_cui AS
|
||||
SELECT
|
||||
cui,
|
||||
COUNT(*) AS nr_aparitii, -- across all lists
|
||||
COUNT(DISTINCT tip_serviciu) FILTER (WHERE tip_serviciu IS NOT NULL) AS nr_tipuri_serviciu,
|
||||
COUNT(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS nr_judete,
|
||||
array_agg(DISTINCT tip_serviciu) FILTER (WHERE tip_serviciu IS NOT NULL) AS tipuri_serviciu,
|
||||
array_agg(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS judete,
|
||||
-- One representative name (most common):
|
||||
(array_agg(name ORDER BY length(name)))[1] AS name_sample,
|
||||
MIN(fetched_at) AS first_seen,
|
||||
MAX(fetched_at) AS last_seen
|
||||
FROM cnas.furnizori
|
||||
WHERE cui IS NOT NULL
|
||||
GROUP BY cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnas_mv_per_cui ON cnas.mv_furnizori_per_cui(cui);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW cnas.mv_furnizori_per_cui IS
|
||||
'Per-CUI rollup of CNAS provider appearances. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnas.mv_furnizori_per_cui;';
|
||||
Reference in New Issue
Block a user