Files
Claude VM a6c03a091e initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
2026-05-13 00:10:32 +03:00

203 lines
12 KiB
SQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- 033_cnsc.sql
-- CNSC — Consiliul Național de Soluționare a Contestațiilor.
-- Independent administrative-jurisdictional body that resolves
-- contestations against SEAP procurement procedures (Law 101/2016).
--
-- Source investigated 2026-05-10:
-- http://portal.cnsc.ro/decizii.html
-- → ASP.NET WebForms front, but data is loaded async via
-- POST http://portal.cnsc.ro/Default.aspx/CallWebMethod
-- payload: {"sender":"67fb8141-f456-4276-b51a-baca731f92ca",
-- "methodName":"get",
-- "senderParams":"a=search&reg:registrationDate=-&page=N",
-- "isBuletin":"0"}
-- Header: Referer must match the page query — server reads page from it.
-- Cookies: ASP.NET_SessionId required; obtained by GET /decizii.html first.
-- → Returns JSON {"d":"<html>...</html>"} with a <table> inside.
-- → 50 rows per page × 617 pages ≈ 30,850 decisions.
-- → Page 1 has 2026 decisions; last page (617) lands in 2024.
-- Sort order is by decision number DESC (panel-grouped).
--
-- Column layout in the listing (already structured — no PDF parse needed
-- to get 80% of the value):
-- 1. Numar decizie → decision_no
-- 2. Nume Contestator → contestator_name (sometimes multiple)
-- 3. Nr. Inregistrare CNSC → registration_no_cnsc
-- 4. Denumire Autoritate → authority_name
-- 5. CUI Contestator → contestator_cui ← linkable to firms.entities
-- 6. CUI Autoritate Contractantă → authority_cui ← linkable to seap.announcements.authority_cui
-- 7. An → year
-- 8. Dată Inregistrare → registration_date
-- 9. Download → pdf_docuid (b64 docUID for sivadoc/download.aspx)
--
-- IMPORTANT: the listing does NOT include the SEAP procedure_ref (CN######) —
-- that lives only inside the PDF text. Stage 2 (PDF text extraction with
-- pdftotext + regex for "CN[0-9]{6,}|SCN[0-9]+|ADV[0-9]+" can recover the
-- SEAP ref for ~80% of decisions; estimate 15-25h to design+QA the parser
-- across the full 30K corpus.
--
-- Decision_type is also PDF-only: the listing shows when the contestation
-- was REGISTERED, not the outcome. PDF parsing is required to recover
-- 'admis' / 'respins' / 'admis în parte' / 'redirecționat' / 'arhivat'.
--
-- Cross-source value (live, even at Stage 1):
-- cnsc.decizii.authority_cui × seap.announcements.authority_cui
-- = "Authorities with most contestations filed against them" (procedural risk score)
-- cnsc.decizii.contestator_cui × seap.announcements.supplier_cui
-- = "Suppliers most active in contesting losses" (litigious-bidder profile)
-- cnsc.decizii.contestator_cui × firms.entities + financials
-- = "Who challenges the most? Are they real bidders or vexatious filers?"
--
-- After Stage 2 PDF parse:
-- cnsc.decizii × seap.announcements ON seap_procedure_ref = ref_number
-- = full contestation lifecycle: tender → contestation → CNSC outcome → award
-- GROUP BY authority_cui WHERE decision_type='admis' / total
-- = "Authorities most likely to lose at CNSC" — strong signal of vicious
-- procedure design. THIS IS THE KILLER QUERY.
CREATE SCHEMA IF NOT EXISTS cnsc;
-- ── 1. Decizii — one row per CNSC decision ─────────────────────────────────
-- PK = (decision_no, year). The CNSC numbering resets every year, so
-- (1234, 2024) ≠ (1234, 2025). docUID is unique-by-doc but not all rows
-- have one in old data, so we don't make it the PK.
CREATE TABLE IF NOT EXISTS cnsc.decizii (
id bigserial PRIMARY KEY,
decision_no integer NOT NULL, -- 1445
decision_year smallint NOT NULL, -- 2026
registration_no_cnsc text, -- "10549, 20389, 20395" — comma-separated when multiple
registration_date date, -- 17.02.2026 → 2026-02-17
-- Contestator (the bidder who filed the complaint)
contestator_name_raw text, -- "RAC CONSTRUCT MORENI SRL , RAC CONSTRUCT MORENI SRL, URBIO DOWNSTREAM SRL"
contestator_names text[], -- split + trimmed
contestator_cui_raw text, -- "RO18035010" or "RO18035010;4663448" or empty
contestator_cuis text[], -- normalized digits only, duplicates collapsed
-- Autoritatea contractantă (the public buyer being contested)
authority_name text,
authority_cui_raw text, -- "4495140;" or "16054368" or empty
authority_cuis text[], -- normalized digits only, duplicates collapsed
-- PDF reference (download URL is built from docuid_b64 + filename_b64)
pdf_filename text, -- "Decizie_1445.pdf"
pdf_docuid_b64 text, -- "Mzg4NThkZGQtY2JkMS00ZDg3LTlhY2UtY2ZlMTBlYzAwM2Y0"
pdf_url text, -- materialized: full http://portal.cnsc.ro/sivadoc/download.aspx?...
-- Stage-2 (PDF parse) fields — NULL until pdftotext+regex pass runs
-- Mostly populated post-hoc; kept here so the schema stays single-table.
seap_procedure_ref text, -- "CN1234567" / "ADV……" / "SCN……" — joinable to seap.announcements.ref_number
decision_type text, -- 'admis' | 'admis_in_parte' | 'respins' | 'redirectionat' | 'arhivat' | 'fond' | NULL
decision_date date, -- date the decision was issued (different from registration_date)
decision_summary text, -- short extracted summary
pdf_text_sha1 text, -- sha1 of pdftotext output → idempotent re-parse
pdf_parsed_at timestamptz,
-- Provenance
source_page integer, -- which listing page we found this on (debug)
fetched_at timestamptz NOT NULL DEFAULT now(),
CONSTRAINT cnsc_decizii_pk_natural UNIQUE (decision_no, decision_year)
);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_authority_cuis
ON cnsc.decizii USING gin (authority_cuis);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_contestator_cuis
ON cnsc.decizii USING gin (contestator_cuis);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_seap_ref
ON cnsc.decizii (seap_procedure_ref) WHERE seap_procedure_ref IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_decision_type
ON cnsc.decizii (decision_type) WHERE decision_type IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_year
ON cnsc.decizii (decision_year DESC);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_reg_date
ON cnsc.decizii (registration_date DESC NULLS LAST);
COMMENT ON TABLE cnsc.decizii IS
'Decizii CNSC — contestații pe proceduri SEAP. PK natural (decision_no, decision_year). '
'Stage 1: scrape listing din portal.cnsc.ro/decizii.html (50/page × 617 pages ≈ 30K rows). '
'Stage 2 (TODO): pdftotext pe PDF-ul referit → seap_procedure_ref + decision_type.';
COMMENT ON COLUMN cnsc.decizii.decision_no IS
'Numărul deciziei CNSC. Resetat anual — întotdeauna unic doar împreună cu decision_year.';
COMMENT ON COLUMN cnsc.decizii.registration_no_cnsc IS
'Numărul/numerele de înregistrare a contestației la CNSC. Poate fi listă comma-separată '
'când o decizie soluționează mai multe contestații (ex. "10549, 20389, 20395").';
COMMENT ON COLUMN cnsc.decizii.contestator_cuis IS
'CUIs cifre-only ale contestatorilor (mai mulți când o asociere atacă). '
'Joinabil cu firms.entities.cui sau seap.announcements.supplier_cui.';
COMMENT ON COLUMN cnsc.decizii.authority_cuis IS
'CUIs cifre-only ale autorităților contractante. Joinabil cu seap.announcements.authority_cui.';
COMMENT ON COLUMN cnsc.decizii.pdf_url IS
'URL complet sivadoc/download.aspx?docUID=…&filename=…&action=inline (b64 in query).';
COMMENT ON COLUMN cnsc.decizii.seap_procedure_ref IS
'Referința procedurii SEAP extrasă din PDF (CN/SCN/ADV/RFQ + cifre). NULL până la Stage-2 PDF-parse.';
COMMENT ON COLUMN cnsc.decizii.decision_type IS
'Tipul deciziei extras din textul PDF: admis | admis_in_parte | respins | redirectionat | arhivat | fond. NULL până la Stage-2.';
-- ── 2. Scrape log (mirrors anre/aaas convention) ──────────────────────────
CREATE TABLE IF NOT EXISTS cnsc.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'listing' | 'pdf_parse'
page_from integer,
page_to integer,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_cnsc_scrape_log_started ON cnsc.scrape_log(started_at DESC);
-- ── 3. Materialized view: per-CUI rollup (authority side) ─────────────────
-- Used by the killer query "authorities most likely to lose at CNSC".
-- decision_type rollup is meaningful only after Stage-2 PDF parse is done;
-- until then admis_count / respins_count are 0 and contestation_count is
-- the useful field.
-- Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui;
CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_authority_cui AS
SELECT
authority_cui AS cui,
COUNT(*) AS contestation_count,
COUNT(*) FILTER (WHERE decision_type = 'admis') AS admis_count,
COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS admis_in_parte_count,
COUNT(*) FILTER (WHERE decision_type = 'respins') AS respins_count,
COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count,
MIN(registration_date) AS first_contestation_date,
MAX(registration_date) AS last_contestation_date
FROM cnsc.decizii d, unnest(authority_cuis) AS authority_cui
WHERE authority_cui IS NOT NULL AND authority_cui <> ''
GROUP BY authority_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_authority_cui
ON cnsc.mv_per_authority_cui(cui);
COMMENT ON MATERIALIZED VIEW cnsc.mv_per_authority_cui IS
'Rollup CNSC per autoritate contractantă (CUI). Refresh: '
'REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui.';
-- ── 4. Materialized view: per-CUI rollup (contestator side) ───────────────
CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_contestator_cui AS
SELECT
contestator_cui AS cui,
COUNT(*) AS contestations_filed,
COUNT(*) FILTER (WHERE decision_type = 'admis') AS won_admis,
COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS won_partial,
COUNT(*) FILTER (WHERE decision_type = 'respins') AS lost_respins,
COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count,
MIN(registration_date) AS first_contestation_date,
MAX(registration_date) AS last_contestation_date
FROM cnsc.decizii d, unnest(contestator_cuis) AS contestator_cui
WHERE contestator_cui IS NOT NULL AND contestator_cui <> ''
GROUP BY contestator_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_contestator_cui
ON cnsc.mv_per_contestator_cui(cui);
COMMENT ON MATERIALIZED VIEW cnsc.mv_per_contestator_cui IS
'Rollup CNSC per contestator (CUI). Cine atacă cel mai mult, cu ce rată de succes. '
'Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_contestator_cui.';