-- 033_cnsc.sql -- CNSC — Consiliul Național de Soluționare a Contestațiilor. -- Independent administrative-jurisdictional body that resolves -- contestations against SEAP procurement procedures (Law 101/2016). -- -- Source investigated 2026-05-10: -- http://portal.cnsc.ro/decizii.html -- → ASP.NET WebForms front, but data is loaded async via -- POST http://portal.cnsc.ro/Default.aspx/CallWebMethod -- payload: {"sender":"67fb8141-f456-4276-b51a-baca731f92ca", -- "methodName":"get", -- "senderParams":"a=search®:registrationDate=-&page=N", -- "isBuletin":"0"} -- Header: Referer must match the page query — server reads page from it. -- Cookies: ASP.NET_SessionId required; obtained by GET /decizii.html first. -- → Returns JSON {"d":"..."} with a inside. -- → 50 rows per page × 617 pages ≈ 30,850 decisions. -- → Page 1 has 2026 decisions; last page (617) lands in 2024. -- Sort order is by decision number DESC (panel-grouped). -- -- Column layout in the listing (already structured — no PDF parse needed -- to get 80% of the value): -- 1. Numar decizie → decision_no -- 2. Nume Contestator → contestator_name (sometimes multiple) -- 3. Nr. Inregistrare CNSC → registration_no_cnsc -- 4. Denumire Autoritate → authority_name -- 5. CUI Contestator → contestator_cui ← linkable to firms.entities -- 6. CUI Autoritate Contractantă → authority_cui ← linkable to seap.announcements.authority_cui -- 7. An → year -- 8. Dată Inregistrare → registration_date -- 9. Download → pdf_docuid (b64 docUID for sivadoc/download.aspx) -- -- IMPORTANT: the listing does NOT include the SEAP procedure_ref (CN######) — -- that lives only inside the PDF text. Stage 2 (PDF text extraction with -- pdftotext + regex for "CN[0-9]{6,}|SCN[0-9]+|ADV[0-9]+" can recover the -- SEAP ref for ~80% of decisions; estimate 15-25h to design+QA the parser -- across the full 30K corpus. -- -- Decision_type is also PDF-only: the listing shows when the contestation -- was REGISTERED, not the outcome. PDF parsing is required to recover -- 'admis' / 'respins' / 'admis în parte' / 'redirecționat' / 'arhivat'. -- -- Cross-source value (live, even at Stage 1): -- cnsc.decizii.authority_cui × seap.announcements.authority_cui -- = "Authorities with most contestations filed against them" (procedural risk score) -- cnsc.decizii.contestator_cui × seap.announcements.supplier_cui -- = "Suppliers most active in contesting losses" (litigious-bidder profile) -- cnsc.decizii.contestator_cui × firms.entities + financials -- = "Who challenges the most? Are they real bidders or vexatious filers?" -- -- After Stage 2 PDF parse: -- cnsc.decizii × seap.announcements ON seap_procedure_ref = ref_number -- = full contestation lifecycle: tender → contestation → CNSC outcome → award -- GROUP BY authority_cui WHERE decision_type='admis' / total -- = "Authorities most likely to lose at CNSC" — strong signal of vicious -- procedure design. THIS IS THE KILLER QUERY. CREATE SCHEMA IF NOT EXISTS cnsc; -- ── 1. Decizii — one row per CNSC decision ───────────────────────────────── -- PK = (decision_no, year). The CNSC numbering resets every year, so -- (1234, 2024) ≠ (1234, 2025). docUID is unique-by-doc but not all rows -- have one in old data, so we don't make it the PK. CREATE TABLE IF NOT EXISTS cnsc.decizii ( id bigserial PRIMARY KEY, decision_no integer NOT NULL, -- 1445 decision_year smallint NOT NULL, -- 2026 registration_no_cnsc text, -- "10549, 20389, 20395" — comma-separated when multiple registration_date date, -- 17.02.2026 → 2026-02-17 -- Contestator (the bidder who filed the complaint) contestator_name_raw text, -- "RAC CONSTRUCT MORENI SRL , RAC CONSTRUCT MORENI SRL, URBIO DOWNSTREAM SRL" contestator_names text[], -- split + trimmed contestator_cui_raw text, -- "RO18035010" or "RO18035010;4663448" or empty contestator_cuis text[], -- normalized digits only, duplicates collapsed -- Autoritatea contractantă (the public buyer being contested) authority_name text, authority_cui_raw text, -- "4495140;" or "16054368" or empty authority_cuis text[], -- normalized digits only, duplicates collapsed -- PDF reference (download URL is built from docuid_b64 + filename_b64) pdf_filename text, -- "Decizie_1445.pdf" pdf_docuid_b64 text, -- "Mzg4NThkZGQtY2JkMS00ZDg3LTlhY2UtY2ZlMTBlYzAwM2Y0" pdf_url text, -- materialized: full http://portal.cnsc.ro/sivadoc/download.aspx?... -- Stage-2 (PDF parse) fields — NULL until pdftotext+regex pass runs -- Mostly populated post-hoc; kept here so the schema stays single-table. seap_procedure_ref text, -- "CN1234567" / "ADV……" / "SCN……" — joinable to seap.announcements.ref_number decision_type text, -- 'admis' | 'admis_in_parte' | 'respins' | 'redirectionat' | 'arhivat' | 'fond' | NULL decision_date date, -- date the decision was issued (different from registration_date) decision_summary text, -- short extracted summary pdf_text_sha1 text, -- sha1 of pdftotext output → idempotent re-parse pdf_parsed_at timestamptz, -- Provenance source_page integer, -- which listing page we found this on (debug) fetched_at timestamptz NOT NULL DEFAULT now(), CONSTRAINT cnsc_decizii_pk_natural UNIQUE (decision_no, decision_year) ); CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_authority_cuis ON cnsc.decizii USING gin (authority_cuis); CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_contestator_cuis ON cnsc.decizii USING gin (contestator_cuis); CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_seap_ref ON cnsc.decizii (seap_procedure_ref) WHERE seap_procedure_ref IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_decision_type ON cnsc.decizii (decision_type) WHERE decision_type IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_year ON cnsc.decizii (decision_year DESC); CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_reg_date ON cnsc.decizii (registration_date DESC NULLS LAST); COMMENT ON TABLE cnsc.decizii IS 'Decizii CNSC — contestații pe proceduri SEAP. PK natural (decision_no, decision_year). ' 'Stage 1: scrape listing din portal.cnsc.ro/decizii.html (50/page × 617 pages ≈ 30K rows). ' 'Stage 2 (TODO): pdftotext pe PDF-ul referit → seap_procedure_ref + decision_type.'; COMMENT ON COLUMN cnsc.decizii.decision_no IS 'Numărul deciziei CNSC. Resetat anual — întotdeauna unic doar împreună cu decision_year.'; COMMENT ON COLUMN cnsc.decizii.registration_no_cnsc IS 'Numărul/numerele de înregistrare a contestației la CNSC. Poate fi listă comma-separată ' 'când o decizie soluționează mai multe contestații (ex. "10549, 20389, 20395").'; COMMENT ON COLUMN cnsc.decizii.contestator_cuis IS 'CUIs cifre-only ale contestatorilor (mai mulți când o asociere atacă). ' 'Joinabil cu firms.entities.cui sau seap.announcements.supplier_cui.'; COMMENT ON COLUMN cnsc.decizii.authority_cuis IS 'CUIs cifre-only ale autorităților contractante. Joinabil cu seap.announcements.authority_cui.'; COMMENT ON COLUMN cnsc.decizii.pdf_url IS 'URL complet sivadoc/download.aspx?docUID=…&filename=…&action=inline (b64 in query).'; COMMENT ON COLUMN cnsc.decizii.seap_procedure_ref IS 'Referința procedurii SEAP extrasă din PDF (CN/SCN/ADV/RFQ + cifre). NULL până la Stage-2 PDF-parse.'; COMMENT ON COLUMN cnsc.decizii.decision_type IS 'Tipul deciziei extras din textul PDF: admis | admis_in_parte | respins | redirectionat | arhivat | fond. NULL până la Stage-2.'; -- ── 2. Scrape log (mirrors anre/aaas convention) ────────────────────────── CREATE TABLE IF NOT EXISTS cnsc.scrape_log ( id bigserial PRIMARY KEY, scraper text NOT NULL, -- 'listing' | 'pdf_parse' page_from integer, page_to integer, rows_seen integer NOT NULL DEFAULT 0, rows_inserted integer NOT NULL DEFAULT 0, rows_updated integer NOT NULL DEFAULT 0, rows_skipped integer NOT NULL DEFAULT 0, duration_ms integer NOT NULL DEFAULT 0, started_at timestamptz NOT NULL, finished_at timestamptz NOT NULL DEFAULT now(), error text ); CREATE INDEX IF NOT EXISTS idx_cnsc_scrape_log_started ON cnsc.scrape_log(started_at DESC); -- ── 3. Materialized view: per-CUI rollup (authority side) ───────────────── -- Used by the killer query "authorities most likely to lose at CNSC". -- decision_type rollup is meaningful only after Stage-2 PDF parse is done; -- until then admis_count / respins_count are 0 and contestation_count is -- the useful field. -- Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui; CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_authority_cui AS SELECT authority_cui AS cui, COUNT(*) AS contestation_count, COUNT(*) FILTER (WHERE decision_type = 'admis') AS admis_count, COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS admis_in_parte_count, COUNT(*) FILTER (WHERE decision_type = 'respins') AS respins_count, COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count, MIN(registration_date) AS first_contestation_date, MAX(registration_date) AS last_contestation_date FROM cnsc.decizii d, unnest(authority_cuis) AS authority_cui WHERE authority_cui IS NOT NULL AND authority_cui <> '' GROUP BY authority_cui; CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_authority_cui ON cnsc.mv_per_authority_cui(cui); COMMENT ON MATERIALIZED VIEW cnsc.mv_per_authority_cui IS 'Rollup CNSC per autoritate contractantă (CUI). Refresh: ' 'REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui.'; -- ── 4. Materialized view: per-CUI rollup (contestator side) ─────────────── CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_contestator_cui AS SELECT contestator_cui AS cui, COUNT(*) AS contestations_filed, COUNT(*) FILTER (WHERE decision_type = 'admis') AS won_admis, COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS won_partial, COUNT(*) FILTER (WHERE decision_type = 'respins') AS lost_respins, COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count, MIN(registration_date) AS first_contestation_date, MAX(registration_date) AS last_contestation_date FROM cnsc.decizii d, unnest(contestator_cuis) AS contestator_cui WHERE contestator_cui IS NOT NULL AND contestator_cui <> '' GROUP BY contestator_cui; CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_contestator_cui ON cnsc.mv_per_contestator_cui(cui); COMMENT ON MATERIALIZED VIEW cnsc.mv_per_contestator_cui IS 'Rollup CNSC per contestator (CUI). Cine atacă cel mai mult, cu ce rată de succes. ' 'Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_contestator_cui.';