a6c03a091e
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
203 lines
12 KiB
SQL
203 lines
12 KiB
SQL
-- 033_cnsc.sql
|
||
-- CNSC — Consiliul Național de Soluționare a Contestațiilor.
|
||
-- Independent administrative-jurisdictional body that resolves
|
||
-- contestations against SEAP procurement procedures (Law 101/2016).
|
||
--
|
||
-- Source investigated 2026-05-10:
|
||
-- http://portal.cnsc.ro/decizii.html
|
||
-- → ASP.NET WebForms front, but data is loaded async via
|
||
-- POST http://portal.cnsc.ro/Default.aspx/CallWebMethod
|
||
-- payload: {"sender":"67fb8141-f456-4276-b51a-baca731f92ca",
|
||
-- "methodName":"get",
|
||
-- "senderParams":"a=search®:registrationDate=-&page=N",
|
||
-- "isBuletin":"0"}
|
||
-- Header: Referer must match the page query — server reads page from it.
|
||
-- Cookies: ASP.NET_SessionId required; obtained by GET /decizii.html first.
|
||
-- → Returns JSON {"d":"<html>...</html>"} with a <table> inside.
|
||
-- → 50 rows per page × 617 pages ≈ 30,850 decisions.
|
||
-- → Page 1 has 2026 decisions; last page (617) lands in 2024.
|
||
-- Sort order is by decision number DESC (panel-grouped).
|
||
--
|
||
-- Column layout in the listing (already structured — no PDF parse needed
|
||
-- to get 80% of the value):
|
||
-- 1. Numar decizie → decision_no
|
||
-- 2. Nume Contestator → contestator_name (sometimes multiple)
|
||
-- 3. Nr. Inregistrare CNSC → registration_no_cnsc
|
||
-- 4. Denumire Autoritate → authority_name
|
||
-- 5. CUI Contestator → contestator_cui ← linkable to firms.entities
|
||
-- 6. CUI Autoritate Contractantă → authority_cui ← linkable to seap.announcements.authority_cui
|
||
-- 7. An → year
|
||
-- 8. Dată Inregistrare → registration_date
|
||
-- 9. Download → pdf_docuid (b64 docUID for sivadoc/download.aspx)
|
||
--
|
||
-- IMPORTANT: the listing does NOT include the SEAP procedure_ref (CN######) —
|
||
-- that lives only inside the PDF text. Stage 2 (PDF text extraction with
|
||
-- pdftotext + regex for "CN[0-9]{6,}|SCN[0-9]+|ADV[0-9]+" can recover the
|
||
-- SEAP ref for ~80% of decisions; estimate 15-25h to design+QA the parser
|
||
-- across the full 30K corpus.
|
||
--
|
||
-- Decision_type is also PDF-only: the listing shows when the contestation
|
||
-- was REGISTERED, not the outcome. PDF parsing is required to recover
|
||
-- 'admis' / 'respins' / 'admis în parte' / 'redirecționat' / 'arhivat'.
|
||
--
|
||
-- Cross-source value (live, even at Stage 1):
|
||
-- cnsc.decizii.authority_cui × seap.announcements.authority_cui
|
||
-- = "Authorities with most contestations filed against them" (procedural risk score)
|
||
-- cnsc.decizii.contestator_cui × seap.announcements.supplier_cui
|
||
-- = "Suppliers most active in contesting losses" (litigious-bidder profile)
|
||
-- cnsc.decizii.contestator_cui × firms.entities + financials
|
||
-- = "Who challenges the most? Are they real bidders or vexatious filers?"
|
||
--
|
||
-- After Stage 2 PDF parse:
|
||
-- cnsc.decizii × seap.announcements ON seap_procedure_ref = ref_number
|
||
-- = full contestation lifecycle: tender → contestation → CNSC outcome → award
|
||
-- GROUP BY authority_cui WHERE decision_type='admis' / total
|
||
-- = "Authorities most likely to lose at CNSC" — strong signal of vicious
|
||
-- procedure design. THIS IS THE KILLER QUERY.
|
||
|
||
CREATE SCHEMA IF NOT EXISTS cnsc;
|
||
|
||
-- ── 1. Decizii — one row per CNSC decision ─────────────────────────────────
|
||
-- PK = (decision_no, year). The CNSC numbering resets every year, so
|
||
-- (1234, 2024) ≠ (1234, 2025). docUID is unique-by-doc but not all rows
|
||
-- have one in old data, so we don't make it the PK.
|
||
CREATE TABLE IF NOT EXISTS cnsc.decizii (
|
||
id bigserial PRIMARY KEY,
|
||
decision_no integer NOT NULL, -- 1445
|
||
decision_year smallint NOT NULL, -- 2026
|
||
registration_no_cnsc text, -- "10549, 20389, 20395" — comma-separated when multiple
|
||
registration_date date, -- 17.02.2026 → 2026-02-17
|
||
|
||
-- Contestator (the bidder who filed the complaint)
|
||
contestator_name_raw text, -- "RAC CONSTRUCT MORENI SRL , RAC CONSTRUCT MORENI SRL, URBIO DOWNSTREAM SRL"
|
||
contestator_names text[], -- split + trimmed
|
||
contestator_cui_raw text, -- "RO18035010" or "RO18035010;4663448" or empty
|
||
contestator_cuis text[], -- normalized digits only, duplicates collapsed
|
||
|
||
-- Autoritatea contractantă (the public buyer being contested)
|
||
authority_name text,
|
||
authority_cui_raw text, -- "4495140;" or "16054368" or empty
|
||
authority_cuis text[], -- normalized digits only, duplicates collapsed
|
||
|
||
-- PDF reference (download URL is built from docuid_b64 + filename_b64)
|
||
pdf_filename text, -- "Decizie_1445.pdf"
|
||
pdf_docuid_b64 text, -- "Mzg4NThkZGQtY2JkMS00ZDg3LTlhY2UtY2ZlMTBlYzAwM2Y0"
|
||
pdf_url text, -- materialized: full http://portal.cnsc.ro/sivadoc/download.aspx?...
|
||
|
||
-- Stage-2 (PDF parse) fields — NULL until pdftotext+regex pass runs
|
||
-- Mostly populated post-hoc; kept here so the schema stays single-table.
|
||
seap_procedure_ref text, -- "CN1234567" / "ADV……" / "SCN……" — joinable to seap.announcements.ref_number
|
||
decision_type text, -- 'admis' | 'admis_in_parte' | 'respins' | 'redirectionat' | 'arhivat' | 'fond' | NULL
|
||
decision_date date, -- date the decision was issued (different from registration_date)
|
||
decision_summary text, -- short extracted summary
|
||
pdf_text_sha1 text, -- sha1 of pdftotext output → idempotent re-parse
|
||
pdf_parsed_at timestamptz,
|
||
|
||
-- Provenance
|
||
source_page integer, -- which listing page we found this on (debug)
|
||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||
|
||
CONSTRAINT cnsc_decizii_pk_natural UNIQUE (decision_no, decision_year)
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_authority_cuis
|
||
ON cnsc.decizii USING gin (authority_cuis);
|
||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_contestator_cuis
|
||
ON cnsc.decizii USING gin (contestator_cuis);
|
||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_seap_ref
|
||
ON cnsc.decizii (seap_procedure_ref) WHERE seap_procedure_ref IS NOT NULL;
|
||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_decision_type
|
||
ON cnsc.decizii (decision_type) WHERE decision_type IS NOT NULL;
|
||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_year
|
||
ON cnsc.decizii (decision_year DESC);
|
||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_reg_date
|
||
ON cnsc.decizii (registration_date DESC NULLS LAST);
|
||
|
||
COMMENT ON TABLE cnsc.decizii IS
|
||
'Decizii CNSC — contestații pe proceduri SEAP. PK natural (decision_no, decision_year). '
|
||
'Stage 1: scrape listing din portal.cnsc.ro/decizii.html (50/page × 617 pages ≈ 30K rows). '
|
||
'Stage 2 (TODO): pdftotext pe PDF-ul referit → seap_procedure_ref + decision_type.';
|
||
COMMENT ON COLUMN cnsc.decizii.decision_no IS
|
||
'Numărul deciziei CNSC. Resetat anual — întotdeauna unic doar împreună cu decision_year.';
|
||
COMMENT ON COLUMN cnsc.decizii.registration_no_cnsc IS
|
||
'Numărul/numerele de înregistrare a contestației la CNSC. Poate fi listă comma-separată '
|
||
'când o decizie soluționează mai multe contestații (ex. "10549, 20389, 20395").';
|
||
COMMENT ON COLUMN cnsc.decizii.contestator_cuis IS
|
||
'CUIs cifre-only ale contestatorilor (mai mulți când o asociere atacă). '
|
||
'Joinabil cu firms.entities.cui sau seap.announcements.supplier_cui.';
|
||
COMMENT ON COLUMN cnsc.decizii.authority_cuis IS
|
||
'CUIs cifre-only ale autorităților contractante. Joinabil cu seap.announcements.authority_cui.';
|
||
COMMENT ON COLUMN cnsc.decizii.pdf_url IS
|
||
'URL complet sivadoc/download.aspx?docUID=…&filename=…&action=inline (b64 in query).';
|
||
COMMENT ON COLUMN cnsc.decizii.seap_procedure_ref IS
|
||
'Referința procedurii SEAP extrasă din PDF (CN/SCN/ADV/RFQ + cifre). NULL până la Stage-2 PDF-parse.';
|
||
COMMENT ON COLUMN cnsc.decizii.decision_type IS
|
||
'Tipul deciziei extras din textul PDF: admis | admis_in_parte | respins | redirectionat | arhivat | fond. NULL până la Stage-2.';
|
||
|
||
-- ── 2. Scrape log (mirrors anre/aaas convention) ──────────────────────────
|
||
CREATE TABLE IF NOT EXISTS cnsc.scrape_log (
|
||
id bigserial PRIMARY KEY,
|
||
scraper text NOT NULL, -- 'listing' | 'pdf_parse'
|
||
page_from integer,
|
||
page_to integer,
|
||
rows_seen integer NOT NULL DEFAULT 0,
|
||
rows_inserted integer NOT NULL DEFAULT 0,
|
||
rows_updated integer NOT NULL DEFAULT 0,
|
||
rows_skipped integer NOT NULL DEFAULT 0,
|
||
duration_ms integer NOT NULL DEFAULT 0,
|
||
started_at timestamptz NOT NULL,
|
||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||
error text
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_cnsc_scrape_log_started ON cnsc.scrape_log(started_at DESC);
|
||
|
||
-- ── 3. Materialized view: per-CUI rollup (authority side) ─────────────────
|
||
-- Used by the killer query "authorities most likely to lose at CNSC".
|
||
-- decision_type rollup is meaningful only after Stage-2 PDF parse is done;
|
||
-- until then admis_count / respins_count are 0 and contestation_count is
|
||
-- the useful field.
|
||
-- Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui;
|
||
CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_authority_cui AS
|
||
SELECT
|
||
authority_cui AS cui,
|
||
COUNT(*) AS contestation_count,
|
||
COUNT(*) FILTER (WHERE decision_type = 'admis') AS admis_count,
|
||
COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS admis_in_parte_count,
|
||
COUNT(*) FILTER (WHERE decision_type = 'respins') AS respins_count,
|
||
COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count,
|
||
MIN(registration_date) AS first_contestation_date,
|
||
MAX(registration_date) AS last_contestation_date
|
||
FROM cnsc.decizii d, unnest(authority_cuis) AS authority_cui
|
||
WHERE authority_cui IS NOT NULL AND authority_cui <> ''
|
||
GROUP BY authority_cui;
|
||
|
||
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_authority_cui
|
||
ON cnsc.mv_per_authority_cui(cui);
|
||
|
||
COMMENT ON MATERIALIZED VIEW cnsc.mv_per_authority_cui IS
|
||
'Rollup CNSC per autoritate contractantă (CUI). Refresh: '
|
||
'REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui.';
|
||
|
||
-- ── 4. Materialized view: per-CUI rollup (contestator side) ───────────────
|
||
CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_contestator_cui AS
|
||
SELECT
|
||
contestator_cui AS cui,
|
||
COUNT(*) AS contestations_filed,
|
||
COUNT(*) FILTER (WHERE decision_type = 'admis') AS won_admis,
|
||
COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS won_partial,
|
||
COUNT(*) FILTER (WHERE decision_type = 'respins') AS lost_respins,
|
||
COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count,
|
||
MIN(registration_date) AS first_contestation_date,
|
||
MAX(registration_date) AS last_contestation_date
|
||
FROM cnsc.decizii d, unnest(contestator_cuis) AS contestator_cui
|
||
WHERE contestator_cui IS NOT NULL AND contestator_cui <> ''
|
||
GROUP BY contestator_cui;
|
||
|
||
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_contestator_cui
|
||
ON cnsc.mv_per_contestator_cui(cui);
|
||
|
||
COMMENT ON MATERIALIZED VIEW cnsc.mv_per_contestator_cui IS
|
||
'Rollup CNSC per contestator (CUI). Cine atacă cel mai mult, cu ce rată de succes. '
|
||
'Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_contestator_cui.';
|