initial: split from gov-agreg — vreau.digital standalone platform

Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
Claude VM
2026-05-13 00:10:32 +03:00
commit a6c03a091e
352 changed files with 75295 additions and 0 deletions
+202
View File
@@ -0,0 +1,202 @@
-- 033_cnsc.sql
-- CNSC — Consiliul Național de Soluționare a Contestațiilor.
-- Independent administrative-jurisdictional body that resolves
-- contestations against SEAP procurement procedures (Law 101/2016).
--
-- Source investigated 2026-05-10:
-- http://portal.cnsc.ro/decizii.html
-- → ASP.NET WebForms front, but data is loaded async via
-- POST http://portal.cnsc.ro/Default.aspx/CallWebMethod
-- payload: {"sender":"67fb8141-f456-4276-b51a-baca731f92ca",
-- "methodName":"get",
-- "senderParams":"a=search&reg:registrationDate=-&page=N",
-- "isBuletin":"0"}
-- Header: Referer must match the page query — server reads page from it.
-- Cookies: ASP.NET_SessionId required; obtained by GET /decizii.html first.
-- → Returns JSON {"d":"<html>...</html>"} with a <table> inside.
-- → 50 rows per page × 617 pages ≈ 30,850 decisions.
-- → Page 1 has 2026 decisions; last page (617) lands in 2024.
-- Sort order is by decision number DESC (panel-grouped).
--
-- Column layout in the listing (already structured — no PDF parse needed
-- to get 80% of the value):
-- 1. Numar decizie → decision_no
-- 2. Nume Contestator → contestator_name (sometimes multiple)
-- 3. Nr. Inregistrare CNSC → registration_no_cnsc
-- 4. Denumire Autoritate → authority_name
-- 5. CUI Contestator → contestator_cui ← linkable to firms.entities
-- 6. CUI Autoritate Contractantă → authority_cui ← linkable to seap.announcements.authority_cui
-- 7. An → year
-- 8. Dată Inregistrare → registration_date
-- 9. Download → pdf_docuid (b64 docUID for sivadoc/download.aspx)
--
-- IMPORTANT: the listing does NOT include the SEAP procedure_ref (CN######) —
-- that lives only inside the PDF text. Stage 2 (PDF text extraction with
-- pdftotext + regex for "CN[0-9]{6,}|SCN[0-9]+|ADV[0-9]+" can recover the
-- SEAP ref for ~80% of decisions; estimate 15-25h to design+QA the parser
-- across the full 30K corpus.
--
-- Decision_type is also PDF-only: the listing shows when the contestation
-- was REGISTERED, not the outcome. PDF parsing is required to recover
-- 'admis' / 'respins' / 'admis în parte' / 'redirecționat' / 'arhivat'.
--
-- Cross-source value (live, even at Stage 1):
-- cnsc.decizii.authority_cui × seap.announcements.authority_cui
-- = "Authorities with most contestations filed against them" (procedural risk score)
-- cnsc.decizii.contestator_cui × seap.announcements.supplier_cui
-- = "Suppliers most active in contesting losses" (litigious-bidder profile)
-- cnsc.decizii.contestator_cui × firms.entities + financials
-- = "Who challenges the most? Are they real bidders or vexatious filers?"
--
-- After Stage 2 PDF parse:
-- cnsc.decizii × seap.announcements ON seap_procedure_ref = ref_number
-- = full contestation lifecycle: tender → contestation → CNSC outcome → award
-- GROUP BY authority_cui WHERE decision_type='admis' / total
-- = "Authorities most likely to lose at CNSC" — strong signal of vicious
-- procedure design. THIS IS THE KILLER QUERY.
CREATE SCHEMA IF NOT EXISTS cnsc;
-- ── 1. Decizii — one row per CNSC decision ─────────────────────────────────
-- PK = (decision_no, year). The CNSC numbering resets every year, so
-- (1234, 2024) ≠ (1234, 2025). docUID is unique-by-doc but not all rows
-- have one in old data, so we don't make it the PK.
CREATE TABLE IF NOT EXISTS cnsc.decizii (
id bigserial PRIMARY KEY,
decision_no integer NOT NULL, -- 1445
decision_year smallint NOT NULL, -- 2026
registration_no_cnsc text, -- "10549, 20389, 20395" — comma-separated when multiple
registration_date date, -- 17.02.2026 → 2026-02-17
-- Contestator (the bidder who filed the complaint)
contestator_name_raw text, -- "RAC CONSTRUCT MORENI SRL , RAC CONSTRUCT MORENI SRL, URBIO DOWNSTREAM SRL"
contestator_names text[], -- split + trimmed
contestator_cui_raw text, -- "RO18035010" or "RO18035010;4663448" or empty
contestator_cuis text[], -- normalized digits only, duplicates collapsed
-- Autoritatea contractantă (the public buyer being contested)
authority_name text,
authority_cui_raw text, -- "4495140;" or "16054368" or empty
authority_cuis text[], -- normalized digits only, duplicates collapsed
-- PDF reference (download URL is built from docuid_b64 + filename_b64)
pdf_filename text, -- "Decizie_1445.pdf"
pdf_docuid_b64 text, -- "Mzg4NThkZGQtY2JkMS00ZDg3LTlhY2UtY2ZlMTBlYzAwM2Y0"
pdf_url text, -- materialized: full http://portal.cnsc.ro/sivadoc/download.aspx?...
-- Stage-2 (PDF parse) fields — NULL until pdftotext+regex pass runs
-- Mostly populated post-hoc; kept here so the schema stays single-table.
seap_procedure_ref text, -- "CN1234567" / "ADV……" / "SCN……" — joinable to seap.announcements.ref_number
decision_type text, -- 'admis' | 'admis_in_parte' | 'respins' | 'redirectionat' | 'arhivat' | 'fond' | NULL
decision_date date, -- date the decision was issued (different from registration_date)
decision_summary text, -- short extracted summary
pdf_text_sha1 text, -- sha1 of pdftotext output → idempotent re-parse
pdf_parsed_at timestamptz,
-- Provenance
source_page integer, -- which listing page we found this on (debug)
fetched_at timestamptz NOT NULL DEFAULT now(),
CONSTRAINT cnsc_decizii_pk_natural UNIQUE (decision_no, decision_year)
);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_authority_cuis
ON cnsc.decizii USING gin (authority_cuis);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_contestator_cuis
ON cnsc.decizii USING gin (contestator_cuis);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_seap_ref
ON cnsc.decizii (seap_procedure_ref) WHERE seap_procedure_ref IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_decision_type
ON cnsc.decizii (decision_type) WHERE decision_type IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_year
ON cnsc.decizii (decision_year DESC);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_reg_date
ON cnsc.decizii (registration_date DESC NULLS LAST);
COMMENT ON TABLE cnsc.decizii IS
'Decizii CNSC — contestații pe proceduri SEAP. PK natural (decision_no, decision_year). '
'Stage 1: scrape listing din portal.cnsc.ro/decizii.html (50/page × 617 pages ≈ 30K rows). '
'Stage 2 (TODO): pdftotext pe PDF-ul referit → seap_procedure_ref + decision_type.';
COMMENT ON COLUMN cnsc.decizii.decision_no IS
'Numărul deciziei CNSC. Resetat anual — întotdeauna unic doar împreună cu decision_year.';
COMMENT ON COLUMN cnsc.decizii.registration_no_cnsc IS
'Numărul/numerele de înregistrare a contestației la CNSC. Poate fi listă comma-separată '
'când o decizie soluționează mai multe contestații (ex. "10549, 20389, 20395").';
COMMENT ON COLUMN cnsc.decizii.contestator_cuis IS
'CUIs cifre-only ale contestatorilor (mai mulți când o asociere atacă). '
'Joinabil cu firms.entities.cui sau seap.announcements.supplier_cui.';
COMMENT ON COLUMN cnsc.decizii.authority_cuis IS
'CUIs cifre-only ale autorităților contractante. Joinabil cu seap.announcements.authority_cui.';
COMMENT ON COLUMN cnsc.decizii.pdf_url IS
'URL complet sivadoc/download.aspx?docUID=…&filename=…&action=inline (b64 in query).';
COMMENT ON COLUMN cnsc.decizii.seap_procedure_ref IS
'Referința procedurii SEAP extrasă din PDF (CN/SCN/ADV/RFQ + cifre). NULL până la Stage-2 PDF-parse.';
COMMENT ON COLUMN cnsc.decizii.decision_type IS
'Tipul deciziei extras din textul PDF: admis | admis_in_parte | respins | redirectionat | arhivat | fond. NULL până la Stage-2.';
-- ── 2. Scrape log (mirrors anre/aaas convention) ──────────────────────────
CREATE TABLE IF NOT EXISTS cnsc.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'listing' | 'pdf_parse'
page_from integer,
page_to integer,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_cnsc_scrape_log_started ON cnsc.scrape_log(started_at DESC);
-- ── 3. Materialized view: per-CUI rollup (authority side) ─────────────────
-- Used by the killer query "authorities most likely to lose at CNSC".
-- decision_type rollup is meaningful only after Stage-2 PDF parse is done;
-- until then admis_count / respins_count are 0 and contestation_count is
-- the useful field.
-- Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui;
CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_authority_cui AS
SELECT
authority_cui AS cui,
COUNT(*) AS contestation_count,
COUNT(*) FILTER (WHERE decision_type = 'admis') AS admis_count,
COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS admis_in_parte_count,
COUNT(*) FILTER (WHERE decision_type = 'respins') AS respins_count,
COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count,
MIN(registration_date) AS first_contestation_date,
MAX(registration_date) AS last_contestation_date
FROM cnsc.decizii d, unnest(authority_cuis) AS authority_cui
WHERE authority_cui IS NOT NULL AND authority_cui <> ''
GROUP BY authority_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_authority_cui
ON cnsc.mv_per_authority_cui(cui);
COMMENT ON MATERIALIZED VIEW cnsc.mv_per_authority_cui IS
'Rollup CNSC per autoritate contractantă (CUI). Refresh: '
'REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui.';
-- ── 4. Materialized view: per-CUI rollup (contestator side) ───────────────
CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_contestator_cui AS
SELECT
contestator_cui AS cui,
COUNT(*) AS contestations_filed,
COUNT(*) FILTER (WHERE decision_type = 'admis') AS won_admis,
COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS won_partial,
COUNT(*) FILTER (WHERE decision_type = 'respins') AS lost_respins,
COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count,
MIN(registration_date) AS first_contestation_date,
MAX(registration_date) AS last_contestation_date
FROM cnsc.decizii d, unnest(contestator_cuis) AS contestator_cui
WHERE contestator_cui IS NOT NULL AND contestator_cui <> ''
GROUP BY contestator_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_contestator_cui
ON cnsc.mv_per_contestator_cui(cui);
COMMENT ON MATERIALIZED VIEW cnsc.mv_per_contestator_cui IS
'Rollup CNSC per contestator (CUI). Cine atacă cel mai mult, cu ce rată de succes. '
'Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_contestator_cui.';