initial: split from gov-agreg — vreau.digital standalone platform

Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
Claude VM
2026-05-13 00:10:32 +03:00
commit a6c03a091e
352 changed files with 75295 additions and 0 deletions
+197
View File
@@ -0,0 +1,197 @@
-- 031_cnas.sql
-- CNAS — Casa Națională de Asigurări de Sănătate.
-- Lista furnizorilor de servicii medicale aflați în relație contractuală cu CAS-urile județene.
--
-- ───────────────────────────────────────────────────────────────────────────
-- Source landscape (investigated 2026-05-10):
-- ───────────────────────────────────────────────────────────────────────────
-- The CNAS source ecosystem is in active migration. There are 3 layers:
--
-- 1. cnas.ro/wp-content/uploads/ — central WordPress media library, 4.18K
-- files total. ~70-90 are "furnizori" PDFs (national + per-județ snapshots).
-- Discoverable via /wp-json/wp/v2/media REST API. 99% PDF, ~5 XLSX.
-- → THIS IS THE PRIMARY SOURCE for v1.
--
-- 2. cas.cnas.ro/casXX — new Angular SPA per județ (42 sub-instances). The
-- backend admin/api is a Blazor app at /admin/api/* with X-Instance-Key
-- header routing. As of 2026-05, most endpoints return [] or 500. The data
-- hasn't been migrated to the new infra yet.
-- → DEFERRED — track via watch script, ingest when populated.
--
-- 3. www.cnas.ro/casXX/page/lista-furnizori-*.html — old pre-migration WP.
-- 301-redirects to cnas.ro/casXX/ which is now a stub. Effectively dead
-- for our purposes; some files survive in CKEditor uploads on the old
-- cas.cnas.ro/casXX/theme/cnas/js/ckeditor/filemanager/userfiles/...
-- → DEFERRED — best harvested via Wayback CDX.
--
-- ───────────────────────────────────────────────────────────────────────────
-- File naming convention (cnas.ro/wp-content/uploads/):
-- ───────────────────────────────────────────────────────────────────────────
-- Most files DON'T contain CUI codes. They contain provider names + sediu +
-- contact data. CUI matching is post-ingest via firms.normalize_company_name
-- + trgm fuzzy search (mirroring anre.licente pattern).
--
-- Filename signals tip_serviciu:
-- FURNIZORI-SPITALE-IN-CONTRACT-2024.pdf → tip='spital'
-- FURNIZORI-IN-CONTRACT-MEDICINA-DE-FAMILIE.. → tip='medicina_familie'
-- FURNIZORI-DE-SERVICII-FARMACEUTICE-.. → tip='farmacie'
-- FURNIZORI-DISPOZITIVE-MEDICALE-.. → tip='dispozitive_medicale'
-- FURNIZORI-MEDICINA-DENTARA-.. → tip='medicina_dentara'
-- FURNIZORI-INGRIJIRI-MEDICALE-.. → tip='ingrijiri_medicale'
-- FURNIZORI-INGRIJIRI-PALIATIVE-.. → tip='ingrijiri_paliative'
-- FURNIZORI-RECUPERARE-MEDICALA-.. → tip='recuperare_medicala'
-- FURNIZORI-PNS-.. → tip='pns' (programe nationale)
-- FURNIZORI-IN-CONTRACT-AMBULATORIU-CLINIC-.. → tip='ambulatoriu_clinic'
-- FURNIZORI-IN-CONTRACT-AMBULATORIU-PARACLIN.. → tip='paraclinic'
-- FURNIZORI-..-URGENTA-PRESPITALICEASCA-.. → tip='urgenta_transport'
-- Lista-furnizori-clinice-.. → tip='clinic'
-- Lista-furnizori-RECA-.. → tip='recuperare_a'
-- Lista-furnizori-radioterapie-.. → tip='radioterapie'
-- Lista-furnizori-testare-genetica-.. → tip='testare_genetica'
-- Lista-furnizori-tumori-solide-maligne-.. → tip='oncologie'
--
-- Județ extraction is heuristic: from filename (e.g. CAS-GORJ, CAS-ARAD) OR
-- from PDF page header ("CASA DE ASIGURARI A JUDETULUI GORJ"). When both fail,
-- it's a national list (rare — most centrally-uploaded files are actually
-- per-județ, as the PDFs are produced by individual CAS-uri).
--
-- ───────────────────────────────────────────────────────────────────────────
-- Cross-source value:
-- ───────────────────────────────────────────────────────────────────────────
-- 1. cnas.furnizori.cui (resolved post-ingest) × seap.announcements.supplier_cui
-- @ CPV 33.* (medical equipment) / 85.* (medical services) =
-- "medical providers winning state contracts directly + via insurance"
-- 2. cnas.furnizori.cui × anaf_datornici.datornic.cui = "spitale & clinici cu
-- datorii la stat" — red-flag pattern.
-- 3. cnas.furnizori.cui × fonduri.proiect.beneficiar_cui (POIM-Sănătate) =
-- EU-funded healthcare providers.
CREATE SCHEMA IF NOT EXISTS cnas;
-- ── 1. Documents (file metadata catalog) ───────────────────────────────────
-- Tracks every PDF/XLSX harvested from cnas.ro WP media. One row per file URL.
-- Idempotent re-fetch: same URL → UPDATE fetched_at + parsed_at.
CREATE TABLE IF NOT EXISTS cnas.documents (
id bigserial PRIMARY KEY,
source_url text NOT NULL UNIQUE,
source text NOT NULL, -- 'wp-media' | 'cas-cnas-spa' | 'wayback' | 'manual'
wp_media_id bigint, -- /wp-json/wp/v2/media id (when source='wp-media')
title text,
filename text,
mime_type text,
file_size_bytes integer,
file_sha1 char(40), -- of downloaded bytes (used to detect changes)
published_at timestamptz, -- WP "date" (publication on CNAS site)
-- Inferred classification (heuristic from filename + content):
tip_serviciu text, -- 'spital' / 'farmacie' / 'medicina_familie' / etc. (see header above)
judet text, -- "GORJ" / "BIHOR" / NULL when national or unknown
perioada text, -- raw period from filename ("01.07.2024", "decembrie-2024")
page_count integer, -- for PDFs, post-parse
parse_status text NOT NULL DEFAULT 'pending',-- 'pending' / 'ok' / 'failed' / 'unsupported_format' / 'no_table'
parse_error text,
rows_extracted integer NOT NULL DEFAULT 0,
fetched_at timestamptz NOT NULL DEFAULT now(),
parsed_at timestamptz
);
CREATE INDEX IF NOT EXISTS idx_cnas_documents_tip ON cnas.documents(tip_serviciu);
CREATE INDEX IF NOT EXISTS idx_cnas_documents_judet ON cnas.documents(judet);
CREATE INDEX IF NOT EXISTS idx_cnas_documents_published ON cnas.documents(published_at DESC);
CREATE INDEX IF NOT EXISTS idx_cnas_documents_parse_status ON cnas.documents(parse_status);
COMMENT ON TABLE cnas.documents IS
'Catalog of every CNAS provider-list document (PDF/XLSX) harvested. One row per source URL. Source: cnas.ro/wp-json/wp/v2/media + future SPA endpoints.';
COMMENT ON COLUMN cnas.documents.tip_serviciu IS
'Service category inferred from filename: spital, farmacie, medicina_familie, medicina_dentara, dispozitive_medicale, ambulatoriu_clinic, paraclinic, ingrijiri_medicale, ingrijiri_paliative, recuperare_medicala, urgenta_transport, pns, clinic, oncologie, testare_genetica, radioterapie, other.';
-- ── 2. Furnizori (extracted provider records) ──────────────────────────────
-- One row per (document, NR_CRT). Provider name is the fundamental key — CUI
-- is resolved POST-INSERT via firms.normalize_company_name fuzzy match (mirror
-- of anre.licente pattern).
--
-- We allow the same legal entity (same CUI) to appear MULTIPLE times across
-- documents (e.g. same hospital listed in spitale + paraclinic + clinic lists).
-- Dedup is via mv_cnas_per_cui rollup, not at insert time.
CREATE TABLE IF NOT EXISTS cnas.furnizori (
id bigserial PRIMARY KEY,
document_id bigint NOT NULL REFERENCES cnas.documents(id) ON DELETE CASCADE,
-- Document context (denormalized for fast filtering):
tip_serviciu text, -- inherited from document
judet text, -- inherited from document (or row-level when available)
perioada text, -- "la 01.07.2024" etc.
-- Source row data:
nr_crt integer, -- in-document index (1..N)
cod_furnizor text, -- "BH01" / "CT12" — CAS-internal supplier code (when present)
name text NOT NULL, -- raw from document ("Spitalul Clinic Județean Oradea", "DR.HEIM HERMINA")
name_norm text, -- firms.normalize_company_name — populated post-insert
reprezentant text, -- legal rep / cabinet doctor (for medicina familie, dentara)
sediu text, -- adresa
localitate text,
telefon text,
fax text,
email text,
specialitate text, -- pentru ambulatoriu, paraclinic, clinic
-- CUI matching (resolved post-ingest):
cui text,
cui_match_score numeric(4,3),
cui_match_method text, -- 'exact_norm' / 'trgm_unique' / 'trgm_judet' / 'manual'
matched_at timestamptz,
raw_text text, -- the raw text-row from PDF for debugging
fetched_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (document_id, nr_crt, name)
);
CREATE INDEX IF NOT EXISTS idx_cnas_furn_cui ON cnas.furnizori(cui) WHERE cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_cnas_furn_judet_tip ON cnas.furnizori(judet, tip_serviciu);
CREATE INDEX IF NOT EXISTS idx_cnas_furn_name_trgm ON cnas.furnizori USING gin (name_norm gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_cnas_furn_document ON cnas.furnizori(document_id);
CREATE INDEX IF NOT EXISTS idx_cnas_furn_localitate ON cnas.furnizori(localitate) WHERE localitate IS NOT NULL;
COMMENT ON TABLE cnas.furnizori IS
'Furnizori de servicii medicale extrași din PDF-urile CNAS. One row per (document, nr_crt, name). CUI resolved post-insert via firms.normalize_company_name + trgm fuzzy.';
COMMENT ON COLUMN cnas.furnizori.cod_furnizor IS
'Internal CAS supplier code, e.g. "BH01" (Bihor seq 01), "MB427" (București-Ilfov seq 427). NOT a CUI.';
COMMENT ON COLUMN cnas.furnizori.cui IS
'Resolved CUI via firms.normalize_company_name + pg_trgm match. NULL = unmatched; cabinete medicale individuale (CMI) often have no CUI in firms registry.';
-- ── 3. Scrape log ─────────────────────────────────────────────────────────
-- Mirrors anre.scrape_log convention.
CREATE TABLE IF NOT EXISTS cnas.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'wp-media-list' / 'parse-pdf' / 'match-cui' / 'cas-cnas-spa'
source_url text,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_cnas_scrape_log_started ON cnas.scrape_log(started_at DESC);
-- ── 4. Materialized view: per-CUI provider rollup ─────────────────────────
-- Joinable with seap.announcements + anaf_datornici + fonduri.proiect for
-- cross-source detection. Refreshed via cron after match-cui pass.
CREATE MATERIALIZED VIEW IF NOT EXISTS cnas.mv_furnizori_per_cui AS
SELECT
cui,
COUNT(*) AS nr_aparitii, -- across all lists
COUNT(DISTINCT tip_serviciu) FILTER (WHERE tip_serviciu IS NOT NULL) AS nr_tipuri_serviciu,
COUNT(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS nr_judete,
array_agg(DISTINCT tip_serviciu) FILTER (WHERE tip_serviciu IS NOT NULL) AS tipuri_serviciu,
array_agg(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS judete,
-- One representative name (most common):
(array_agg(name ORDER BY length(name)))[1] AS name_sample,
MIN(fetched_at) AS first_seen,
MAX(fetched_at) AS last_seen
FROM cnas.furnizori
WHERE cui IS NOT NULL
GROUP BY cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnas_mv_per_cui ON cnas.mv_furnizori_per_cui(cui);
COMMENT ON MATERIALIZED VIEW cnas.mv_furnizori_per_cui IS
'Per-CUI rollup of CNAS provider appearances. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnas.mv_furnizori_per_cui;';