initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
@@ -0,0 +1,189 @@
|
||||
-- SEAP Data Schema for Harta Banilor Publici
|
||||
-- Runs inside architools_db, isolated in schema "seap"
|
||||
-- ZERO modifications to existing public.* tables
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Enable extensions needed for fuzzy matching
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
CREATE EXTENSION IF NOT EXISTS unaccent;
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS seap;
|
||||
|
||||
-- ── Entități SEAP (autorități contractante + furnizori) ──
|
||||
|
||||
CREATE TABLE seap.entities (
|
||||
entity_id INTEGER PRIMARY KEY,
|
||||
entity_type TEXT NOT NULL CHECK (entity_type IN ('authority', 'supplier')),
|
||||
fiscal_number TEXT,
|
||||
name TEXT NOT NULL,
|
||||
city TEXT,
|
||||
county TEXT,
|
||||
address TEXT,
|
||||
postal_code TEXT,
|
||||
is_utility BOOLEAN,
|
||||
siruta TEXT REFERENCES public."GisUat"(siruta),
|
||||
match_score REAL,
|
||||
fetched_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_entities_fiscal ON seap.entities(fiscal_number);
|
||||
CREATE INDEX idx_entities_siruta ON seap.entities(siruta);
|
||||
CREATE INDEX idx_entities_type ON seap.entities(entity_type);
|
||||
CREATE INDEX idx_entities_county ON seap.entities(county);
|
||||
|
||||
-- ── Achiziții directe ──
|
||||
|
||||
CREATE TABLE seap.direct_acquisitions (
|
||||
id INTEGER PRIMARY KEY,
|
||||
unique_code TEXT UNIQUE,
|
||||
name TEXT,
|
||||
cpv_code TEXT,
|
||||
cpv_name TEXT,
|
||||
publication_date TIMESTAMPTZ,
|
||||
finalization_date TIMESTAMPTZ,
|
||||
estimated_value NUMERIC(15,2),
|
||||
closing_value NUMERIC(15,2),
|
||||
currency TEXT DEFAULT 'RON',
|
||||
state_id INTEGER,
|
||||
state_text TEXT,
|
||||
contract_type_id INTEGER,
|
||||
contract_type_text TEXT,
|
||||
eu_fund_id INTEGER,
|
||||
eu_fund_text TEXT,
|
||||
authority_id INTEGER REFERENCES seap.entities(entity_id),
|
||||
supplier_id INTEGER REFERENCES seap.entities(entity_id),
|
||||
fetched_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_da_authority ON seap.direct_acquisitions(authority_id);
|
||||
CREATE INDEX idx_da_supplier ON seap.direct_acquisitions(supplier_id);
|
||||
CREATE INDEX idx_da_finalization ON seap.direct_acquisitions(finalization_date);
|
||||
CREATE INDEX idx_da_publication ON seap.direct_acquisitions(publication_date);
|
||||
CREATE INDEX idx_da_cpv ON seap.direct_acquisitions(cpv_code);
|
||||
CREATE INDEX idx_da_value ON seap.direct_acquisitions(closing_value);
|
||||
|
||||
-- ── Licitații publice (contract award notices) ──
|
||||
|
||||
CREATE TABLE seap.public_notices (
|
||||
id INTEGER PRIMARY KEY,
|
||||
notice_no TEXT,
|
||||
contract_title TEXT,
|
||||
cpv_code TEXT,
|
||||
cpv_name TEXT,
|
||||
estimated_value NUMERIC(15,2),
|
||||
contract_value NUMERIC(15,2),
|
||||
currency TEXT DEFAULT 'RON',
|
||||
publication_date TIMESTAMPTZ,
|
||||
state_date TIMESTAMPTZ,
|
||||
procedure_type_id INTEGER,
|
||||
procedure_type_text TEXT,
|
||||
contract_type_id INTEGER,
|
||||
contract_type_text TEXT,
|
||||
notice_type_id INTEGER,
|
||||
state_id INTEGER,
|
||||
state_text TEXT,
|
||||
authority_id INTEGER REFERENCES seap.entities(entity_id),
|
||||
authority_city TEXT,
|
||||
authority_county TEXT,
|
||||
authority_siruta TEXT REFERENCES public."GisUat"(siruta),
|
||||
has_lots BOOLEAN DEFAULT false,
|
||||
fetched_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_pn_authority ON seap.public_notices(authority_id);
|
||||
CREATE INDEX idx_pn_date ON seap.public_notices(publication_date);
|
||||
CREATE INDEX idx_pn_siruta ON seap.public_notices(authority_siruta);
|
||||
CREATE INDEX idx_pn_cpv ON seap.public_notices(cpv_code);
|
||||
|
||||
-- ── Contracte câștigate (din section 5 a licitațiilor) ──
|
||||
|
||||
CREATE TABLE seap.notice_contracts (
|
||||
id SERIAL PRIMARY KEY,
|
||||
notice_id INTEGER REFERENCES seap.public_notices(id),
|
||||
lot_number INTEGER,
|
||||
lot_title TEXT,
|
||||
contract_value NUMERIC(15,2),
|
||||
currency TEXT DEFAULT 'RON',
|
||||
contract_date DATE,
|
||||
winner_id INTEGER REFERENCES seap.entities(entity_id),
|
||||
winner_name TEXT,
|
||||
winner_fiscal TEXT,
|
||||
winner_city TEXT,
|
||||
winner_county TEXT,
|
||||
winner_siruta TEXT REFERENCES public."GisUat"(siruta),
|
||||
num_offers INTEGER
|
||||
);
|
||||
|
||||
CREATE INDEX idx_nc_notice ON seap.notice_contracts(notice_id);
|
||||
CREATE INDEX idx_nc_winner ON seap.notice_contracts(winner_id);
|
||||
CREATE INDEX idx_nc_winner_siruta ON seap.notice_contracts(winner_siruta);
|
||||
|
||||
-- ── Matching localități SEAP → SIRUTA ──
|
||||
|
||||
CREATE TABLE seap.locality_map (
|
||||
seap_city TEXT NOT NULL,
|
||||
seap_county TEXT NOT NULL,
|
||||
siruta TEXT REFERENCES public."GisUat"(siruta),
|
||||
match_type TEXT,
|
||||
confidence REAL,
|
||||
PRIMARY KEY (seap_city, seap_county)
|
||||
);
|
||||
|
||||
-- ── Stare sync scraper ──
|
||||
|
||||
CREATE TABLE seap.sync_state (
|
||||
source TEXT PRIMARY KEY,
|
||||
last_date TIMESTAMPTZ,
|
||||
last_id INTEGER,
|
||||
status TEXT,
|
||||
updated_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
INSERT INTO seap.sync_state (source, status) VALUES
|
||||
('da', 'pending'),
|
||||
('notices', 'pending');
|
||||
|
||||
-- ── Helper: normalize locality names ──
|
||||
|
||||
CREATE OR REPLACE FUNCTION seap.normalize_locality(input TEXT)
|
||||
RETURNS TEXT LANGUAGE sql IMMUTABLE AS $$
|
||||
SELECT lower(trim(unaccent(
|
||||
regexp_replace(input, '\s+', ' ', 'g')
|
||||
)));
|
||||
$$;
|
||||
|
||||
-- ── Materialized view: procurement stats per UAT ──
|
||||
|
||||
CREATE MATERIALIZED VIEW seap.uat_procurement_stats AS
|
||||
SELECT
|
||||
u.siruta,
|
||||
u.name AS uat_name,
|
||||
u.county,
|
||||
COALESCE(da_stats.da_count, 0) AS da_count,
|
||||
COALESCE(da_stats.da_total_value, 0) AS da_total_value,
|
||||
COALESCE(pn_stats.notice_count, 0) AS notice_count,
|
||||
COALESCE(pn_stats.notice_total_value, 0) AS notice_total_value,
|
||||
COALESCE(da_stats.da_count, 0) + COALESCE(pn_stats.notice_count, 0) AS total_contracts,
|
||||
COALESCE(da_stats.da_total_value, 0) + COALESCE(pn_stats.notice_total_value, 0) AS total_value
|
||||
FROM public."GisUat" u
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
COUNT(*) AS da_count,
|
||||
SUM(da.closing_value) AS da_total_value
|
||||
FROM seap.direct_acquisitions da
|
||||
JOIN seap.entities e ON e.entity_id = da.authority_id
|
||||
WHERE e.siruta = u.siruta
|
||||
) da_stats ON true
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
COUNT(*) AS notice_count,
|
||||
SUM(pn.contract_value) AS notice_total_value
|
||||
FROM seap.public_notices pn
|
||||
WHERE pn.authority_siruta = u.siruta
|
||||
) pn_stats ON true;
|
||||
|
||||
CREATE UNIQUE INDEX idx_ups_siruta ON seap.uat_procurement_stats(siruta);
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,52 @@
|
||||
-- Unified announcements table for all SEAP data types
|
||||
BEGIN;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS seap.announcements (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
type TEXT NOT NULL,
|
||||
ref_number TEXT NOT NULL,
|
||||
authority_name TEXT,
|
||||
authority_cui TEXT,
|
||||
authority_siruta TEXT,
|
||||
title TEXT,
|
||||
cpv_code TEXT,
|
||||
cpv_name TEXT,
|
||||
contract_type TEXT,
|
||||
publication_date TIMESTAMPTZ,
|
||||
finalization_date TIMESTAMPTZ,
|
||||
contract_date DATE,
|
||||
estimated_value NUMERIC(15,2),
|
||||
awarded_value NUMERIC(15,2),
|
||||
currency TEXT DEFAULT 'RON',
|
||||
supplier_name TEXT,
|
||||
supplier_cui TEXT,
|
||||
supplier_siruta TEXT,
|
||||
procedure_type TEXT,
|
||||
procedure_state TEXT,
|
||||
award_type TEXT,
|
||||
legislation TEXT,
|
||||
criterion TEXT,
|
||||
eu_funded TEXT,
|
||||
eu_program TEXT,
|
||||
lot_number INTEGER,
|
||||
has_lots TEXT,
|
||||
joue TEXT,
|
||||
value_before NUMERIC(15,2),
|
||||
value_after NUMERIC(15,2),
|
||||
modification_desc TEXT,
|
||||
seap_url TEXT,
|
||||
source TEXT DEFAULT 'datagov',
|
||||
imported_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE(type, ref_number)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_type ON seap.announcements(type);
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_auth_cui ON seap.announcements(authority_cui);
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_auth_siruta ON seap.announcements(authority_siruta);
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_sup_cui ON seap.announcements(supplier_cui);
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_pub_date ON seap.announcements(publication_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_value ON seap.announcements(awarded_value);
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_cpv ON seap.announcements(cpv_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_contract_type ON seap.announcements(contract_type);
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,98 @@
|
||||
-- Platform tables for submissions + voting
|
||||
BEGIN;
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS platform;
|
||||
|
||||
-- Ideas/submissions — anyone can propose
|
||||
CREATE TABLE platform.ideas (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
problem TEXT NOT NULL, -- "Ce te deranjează?"
|
||||
solution TEXT, -- "Cum ar trebui să fie?"
|
||||
category TEXT DEFAULT 'general', -- transparenta, cereri, ai, educatie, sanatate, etc
|
||||
author_name TEXT, -- optional
|
||||
author_email TEXT, -- optional, for follow-up
|
||||
author_city TEXT, -- optional
|
||||
status TEXT DEFAULT 'nou', -- nou, în discuție, în lucru, mvp, live, respins
|
||||
votes INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_ideas_votes ON platform.ideas(votes DESC);
|
||||
CREATE INDEX idx_ideas_status ON platform.ideas(status);
|
||||
CREATE INDEX idx_ideas_created ON platform.ideas(created_at DESC);
|
||||
CREATE INDEX idx_ideas_category ON platform.ideas(category);
|
||||
|
||||
-- Votes — fingerprint-based (no accounts)
|
||||
CREATE TABLE platform.votes (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
idea_id BIGINT REFERENCES platform.ideas(id) ON DELETE CASCADE,
|
||||
fingerprint TEXT NOT NULL, -- hash of IP + user-agent
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE(idea_id, fingerprint)
|
||||
);
|
||||
|
||||
-- Comments on ideas — simple, no accounts
|
||||
CREATE TABLE platform.comments (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
idea_id BIGINT REFERENCES platform.ideas(id) ON DELETE CASCADE,
|
||||
author_name TEXT DEFAULT 'Anonim',
|
||||
content TEXT NOT NULL,
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_comments_idea ON platform.comments(idea_id, created_at);
|
||||
|
||||
-- Seed some initial ideas to get things started
|
||||
INSERT INTO platform.ideas (title, problem, solution, category, author_name, status, votes) VALUES
|
||||
(
|
||||
'Verificare status dosar la orice instituție',
|
||||
'Trebuie să mergi fizic sau să suni repetat ca să afli ce se întâmplă cu dosarul tău. Fiecare instituție are alt sistem, unele nu au deloc.',
|
||||
'O platformă unificată unde introduci numărul de dosar și vezi statusul instant, indiferent de instituție.',
|
||||
'cereri', 'Comunitate', 'nou', 42
|
||||
),
|
||||
(
|
||||
'Extras Carte Funciară online, instant',
|
||||
'Durează 3-5 zile și necesită deplasare la OCPI. În 2026, un document public ar trebui disponibil online.',
|
||||
'Introduci număr cadastral → primești PDF cu extrasul CF. Fără deplasare, fără așteptare.',
|
||||
'cereri', 'Comunitate', 'nou', 38
|
||||
),
|
||||
(
|
||||
'Certificat fiscal în 30 de secunde',
|
||||
'Stai la coadă la primărie, plătești timbru, aștepți 1-3 zile. De 3 ori pe an minim, dacă ai firmă.',
|
||||
'CNP sau CUI → certificat fiscal digital, semnat electronic, valid legal.',
|
||||
'cereri', 'Comunitate', 'nou', 35
|
||||
),
|
||||
(
|
||||
'Programare buletin/pașaport care chiar funcționează',
|
||||
'Sistemul MAI e permanent supraîncărcat, cade, nu găsești slot-uri. Ajungi la 4 dimineața la coadă.',
|
||||
'Calendar cu disponibilitate reală, notificare când se eliberează slot, programare în 3 click-uri.',
|
||||
'cereri', 'Comunitate', 'nou', 50
|
||||
),
|
||||
(
|
||||
'Calculator taxe și impozite locale',
|
||||
'Nu știi cât datorezi, trebuie să mergi la primărie să afli. Fiecare primărie calculează diferit.',
|
||||
'Introdu adresa sau nr. cadastral → vezi toate taxele datorate, cu deadline-uri și posibilitate de plată.',
|
||||
'transparenta', 'Comunitate', 'nou', 30
|
||||
),
|
||||
(
|
||||
'Monitor licitații publice cu alerte',
|
||||
'Informația e dispersată, greu de urmărit. Firmele mici pierd oportunități pentru că nu știu de ele.',
|
||||
'Feed cu licitații filtrat pe domeniu/județ/valoare. Alerte pe email când apare ceva relevant.',
|
||||
'transparenta', 'Comunitate', 'în lucru', 25
|
||||
),
|
||||
(
|
||||
'Profil digital per primărie',
|
||||
'Nu existe un loc centralizat unde să vezi cum performează primăria ta: buget, licitații, servicii digitale.',
|
||||
'Pagina per primărie cu: buget, top cheltuieli, licitații, nivel digitalizare, comparație cu altele.',
|
||||
'transparenta', 'Comunitate', 'în lucru', 22
|
||||
),
|
||||
(
|
||||
'Generator cereri și petiții cu AI',
|
||||
'Oamenii nu știu cum să formuleze o cerere oficială. Limbajul birocratic intimidează.',
|
||||
'Descrii în cuvintele tale ce vrei → AI generează cererea completă, cu referințe legale corecte.',
|
||||
'ai', 'Comunitate', 'nou', 28
|
||||
);
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,174 @@
|
||||
-- WSP integration: schema extensions for SEAP web service ingestion.
|
||||
-- Idempotent: safe to re-run on existing DB (already has ~600K rows in seap.announcements).
|
||||
BEGIN;
|
||||
|
||||
-- ── Extend seap.announcements for WSP-specific structured + raw data ──
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS county_code TEXT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS notice_state TEXT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS notice_state_id INT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS deadline_submission TIMESTAMPTZ;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS opening_date TIMESTAMPTZ;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS duration_months INT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS duration_days INT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_address TEXT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_email TEXT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_phone TEXT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_url TEXT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_type TEXT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_main_activity TEXT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS supplier_address TEXT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS supplier_is_sme BOOLEAN;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS framework_agreement BOOLEAN;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS lots_count INT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS contract_has_lots BOOLEAN;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS award_criteria JSONB;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS lots JSONB;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS documents JSONB;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS details JSONB; -- raw Section1-6 nested
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS notice_id_internal BIGINT; -- WSP CNoticeId / CaNoticeId
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_entity_id INT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS supplier_entity_id INT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS enriched_at TIMESTAMPTZ;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_county ON seap.announcements(county_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_state ON seap.announcements(notice_state);
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_deadline ON seap.announcements(deadline_submission) WHERE deadline_submission IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_authority_name_trgm ON seap.announcements USING gin(authority_name gin_trgm_ops);
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_supplier_name_trgm ON seap.announcements USING gin(supplier_name gin_trgm_ops);
|
||||
|
||||
-- pg_trgm for fuzzy authority/supplier name search (idempotent)
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
|
||||
|
||||
-- ── Sync state: cursor per WSP feed ──
|
||||
CREATE TABLE IF NOT EXISTS seap.wsp_sync_state (
|
||||
feed TEXT PRIMARY KEY, -- e.g. 'ca_notices', 'c_notices', 'su_contracts'
|
||||
last_run_at TIMESTAMPTZ,
|
||||
last_cursor_date TIMESTAMPTZ, -- highest publication_date successfully ingested
|
||||
last_window_start TIMESTAMPTZ,
|
||||
last_window_end TIMESTAMPTZ,
|
||||
items_imported_total BIGINT DEFAULT 0,
|
||||
items_imported_24h INT DEFAULT 0,
|
||||
consecutive_errors INT DEFAULT 0,
|
||||
last_error TEXT,
|
||||
last_error_at TIMESTAMPTZ,
|
||||
notes TEXT
|
||||
);
|
||||
|
||||
-- ── Backfill window queue: each window is a checkpoint ──
|
||||
CREATE TABLE IF NOT EXISTS seap.wsp_backfill_windows (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
feed TEXT NOT NULL,
|
||||
window_start TIMESTAMPTZ NOT NULL,
|
||||
window_end TIMESTAMPTZ NOT NULL,
|
||||
county_code TEXT, -- optional partition
|
||||
state TEXT NOT NULL DEFAULT 'pending', -- pending, in_progress, completed, failed, skipped
|
||||
items_imported INT DEFAULT 0,
|
||||
page_total INT,
|
||||
attempts INT DEFAULT 0,
|
||||
last_error TEXT,
|
||||
started_at TIMESTAMPTZ,
|
||||
completed_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE(feed, window_start, window_end, county_code)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_wsp_bf_state ON seap.wsp_backfill_windows(feed, state, window_start);
|
||||
CREATE INDEX IF NOT EXISTS idx_wsp_bf_pending ON seap.wsp_backfill_windows(feed, window_start) WHERE state = 'pending';
|
||||
|
||||
|
||||
-- ── Beletage-scoped tables (Su* operations) ──
|
||||
CREATE TABLE IF NOT EXISTS seap.beletage_contracts (
|
||||
contract_id BIGINT PRIMARY KEY, -- WSP ContractId
|
||||
contract_no TEXT,
|
||||
contract_title TEXT,
|
||||
contract_type TEXT,
|
||||
contract_phase TEXT,
|
||||
contract_state TEXT,
|
||||
awarding_date DATE,
|
||||
contract_date DATE,
|
||||
publication_date TIMESTAMPTZ,
|
||||
duration_months INT,
|
||||
contract_value NUMERIC(15,2),
|
||||
default_currency_value NUMERIC(15,2),
|
||||
currency TEXT,
|
||||
ca_notice_id BIGINT, -- link to public CA notice
|
||||
ca_notice_no TEXT,
|
||||
authority_name TEXT,
|
||||
authority_cui TEXT,
|
||||
is_current_version BOOLEAN,
|
||||
is_rejected BOOLEAN,
|
||||
version_no INT,
|
||||
version_date TIMESTAMPTZ,
|
||||
justification TEXT,
|
||||
additional_information TEXT,
|
||||
details JSONB, -- raw CANotice + ContractPhases + ContractSections
|
||||
imported_at TIMESTAMPTZ DEFAULT now(),
|
||||
enriched_at TIMESTAMPTZ
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_beletage_contracts_date ON seap.beletage_contracts(awarding_date DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_beletage_contracts_authority ON seap.beletage_contracts(authority_cui);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS seap.beletage_invoices (
|
||||
invoice_id BIGINT PRIMARY KEY, -- WSP InvoiceId
|
||||
invoice_no TEXT,
|
||||
invoice_date DATE,
|
||||
due_date DATE,
|
||||
contract_id BIGINT, -- FK soft to beletage_contracts
|
||||
contract_no TEXT,
|
||||
authority_name TEXT,
|
||||
authority_cui TEXT,
|
||||
total_value NUMERIC(15,2),
|
||||
total_value_no_vat NUMERIC(15,2),
|
||||
vat_value NUMERIC(15,2),
|
||||
currency TEXT,
|
||||
state TEXT,
|
||||
paid_value NUMERIC(15,2),
|
||||
paid_at TIMESTAMPTZ,
|
||||
details JSONB, -- raw InvoiceItem + payments + details
|
||||
imported_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_beletage_invoices_date ON seap.beletage_invoices(invoice_date DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_beletage_invoices_contract ON seap.beletage_invoices(contract_id);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS seap.beletage_direct_acquisitions (
|
||||
da_id BIGINT PRIMARY KEY, -- WSP DirectAcquisitionId
|
||||
da_name TEXT,
|
||||
unique_identification_code TEXT,
|
||||
cpv_code TEXT,
|
||||
cpv_name TEXT,
|
||||
contract_type TEXT,
|
||||
publication_date TIMESTAMPTZ,
|
||||
finalization_date TIMESTAMPTZ,
|
||||
estimated_value NUMERIC(15,2),
|
||||
closing_value NUMERIC(15,2),
|
||||
currency TEXT,
|
||||
da_state TEXT,
|
||||
authority_id INT,
|
||||
authority_name TEXT,
|
||||
authority_cui TEXT,
|
||||
details JSONB,
|
||||
imported_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_beletage_da_date ON seap.beletage_direct_acquisitions(finalization_date DESC);
|
||||
|
||||
-- ── Beletage catalog (if used) ──
|
||||
CREATE TABLE IF NOT EXISTS seap.beletage_catalog (
|
||||
item_code TEXT PRIMARY KEY,
|
||||
item_name TEXT,
|
||||
cpv_code TEXT,
|
||||
unit_price NUMERIC(15,2),
|
||||
currency TEXT,
|
||||
last_updated TIMESTAMPTZ,
|
||||
details JSONB,
|
||||
imported_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
|
||||
-- ── Materialized views for hub UI (refresh nightly) ──
|
||||
-- Will be added in 005 once bulk data is in; placeholder comment here for traceability.
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,121 @@
|
||||
-- Materialized views for hub UI — refreshed nightly after WSP sync.
|
||||
-- Provides fast aggregations for "Achiziții România live" dashboards.
|
||||
BEGIN;
|
||||
|
||||
-- ── Daily totals: count + value per day (across all WSP sources) ──
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS seap.mv_daily_totals AS
|
||||
SELECT
|
||||
date_trunc('day', publication_date)::date AS day,
|
||||
type,
|
||||
count(*) AS notices,
|
||||
sum(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS total_awarded,
|
||||
sum(estimated_value) FILTER (WHERE estimated_value IS NOT NULL) AS total_estimated,
|
||||
count(DISTINCT authority_cui) AS distinct_authorities,
|
||||
count(DISTINCT supplier_cui) AS distinct_suppliers
|
||||
FROM seap.announcements
|
||||
WHERE source LIKE 'wsp_%'
|
||||
AND publication_date >= now() - interval '24 months'
|
||||
GROUP BY 1, 2;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_daily_totals_day ON seap.mv_daily_totals(day DESC);
|
||||
|
||||
|
||||
-- ── Top contracting authorities (last 12 months by total awarded value) ──
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS seap.mv_top_authorities AS
|
||||
SELECT
|
||||
authority_cui,
|
||||
authority_name,
|
||||
county_code,
|
||||
count(*) AS notices_count,
|
||||
count(*) FILTER (WHERE type = 'ca_notice') AS awarded_count,
|
||||
sum(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS total_awarded,
|
||||
avg(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS avg_awarded,
|
||||
array_agg(DISTINCT cpv_code) FILTER (WHERE cpv_code IS NOT NULL) AS cpv_codes,
|
||||
max(publication_date) AS most_recent
|
||||
FROM seap.announcements
|
||||
WHERE source LIKE 'wsp_%'
|
||||
AND authority_cui IS NOT NULL
|
||||
AND publication_date >= now() - interval '12 months'
|
||||
GROUP BY 1, 2, 3
|
||||
HAVING count(*) >= 1;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_top_auth_value ON seap.mv_top_authorities(total_awarded DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_top_auth_cui ON seap.mv_top_authorities(authority_cui);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_top_auth_county ON seap.mv_top_authorities(county_code);
|
||||
|
||||
|
||||
-- ── Top suppliers (firms that won contracts) ──
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS seap.mv_top_suppliers AS
|
||||
SELECT
|
||||
supplier_cui,
|
||||
supplier_name,
|
||||
count(*) AS contracts_won,
|
||||
sum(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS total_awarded,
|
||||
avg(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS avg_awarded,
|
||||
count(DISTINCT authority_cui) AS distinct_clients,
|
||||
array_agg(DISTINCT cpv_code) FILTER (WHERE cpv_code IS NOT NULL) AS cpv_codes,
|
||||
max(publication_date) AS most_recent
|
||||
FROM seap.announcements
|
||||
WHERE source LIKE 'wsp_%'
|
||||
AND supplier_cui IS NOT NULL
|
||||
AND type = 'ca_notice'
|
||||
AND publication_date >= now() - interval '12 months'
|
||||
GROUP BY 1, 2;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_top_supp_value ON seap.mv_top_suppliers(total_awarded DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_top_supp_cui ON seap.mv_top_suppliers(supplier_cui);
|
||||
|
||||
|
||||
-- ── Top CPV codes (most-used categories) ──
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS seap.mv_top_cpv AS
|
||||
SELECT
|
||||
cpv_code,
|
||||
count(*) AS notices_count,
|
||||
sum(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS total_awarded,
|
||||
count(DISTINCT authority_cui) AS distinct_buyers,
|
||||
count(DISTINCT supplier_cui) AS distinct_winners
|
||||
FROM seap.announcements
|
||||
WHERE source LIKE 'wsp_%'
|
||||
AND cpv_code IS NOT NULL
|
||||
AND publication_date >= now() - interval '12 months'
|
||||
GROUP BY 1;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_top_cpv_value ON seap.mv_top_cpv(total_awarded DESC NULLS LAST);
|
||||
|
||||
|
||||
-- ── County totals (for map) ──
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS seap.mv_county_totals AS
|
||||
SELECT
|
||||
county_code,
|
||||
type,
|
||||
count(*) AS notices_count,
|
||||
sum(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS total_awarded
|
||||
FROM seap.announcements
|
||||
WHERE source LIKE 'wsp_%'
|
||||
AND county_code IS NOT NULL
|
||||
AND publication_date >= now() - interval '12 months'
|
||||
GROUP BY 1, 2;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_county_totals_code ON seap.mv_county_totals(county_code);
|
||||
|
||||
|
||||
-- ── Refresh function (called by cron after daily sync) ──
|
||||
CREATE OR REPLACE FUNCTION seap.refresh_wsp_views()
|
||||
RETURNS void AS $$
|
||||
BEGIN
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_daily_totals;
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_authorities;
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_suppliers;
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_cpv;
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_county_totals;
|
||||
EXCEPTION WHEN feature_not_supported THEN
|
||||
-- CONCURRENTLY requires unique index; first refresh is non-concurrent
|
||||
REFRESH MATERIALIZED VIEW seap.mv_daily_totals;
|
||||
REFRESH MATERIALIZED VIEW seap.mv_top_authorities;
|
||||
REFRESH MATERIALIZED VIEW seap.mv_top_suppliers;
|
||||
REFRESH MATERIALIZED VIEW seap.mv_top_cpv;
|
||||
REFRESH MATERIALIZED VIEW seap.mv_county_totals;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,71 @@
|
||||
-- Map WSP rows to UAT SIRUTA codes + extend the harta UAT stats view.
|
||||
-- (Suppliers may have "RO " prefix; authorities are clean. Strip both forms.)
|
||||
BEGIN;
|
||||
|
||||
-- Indexes to make the UPDATE fast
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_auth_cui_wsp ON seap.announcements(authority_cui)
|
||||
WHERE source LIKE 'wsp_%' AND authority_siruta IS NULL AND authority_cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_supp_cui_wsp ON seap.announcements(supplier_cui)
|
||||
WHERE source LIKE 'wsp_%' AND supplier_siruta IS NULL AND supplier_cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_cui_loc_cui ON seap.cui_location(cui) WHERE siruta IS NOT NULL;
|
||||
|
||||
-- Authority — clean numeric CUI (direct match)
|
||||
UPDATE seap.announcements a
|
||||
SET authority_siruta = cl.siruta
|
||||
FROM seap.cui_location cl
|
||||
WHERE a.source LIKE 'wsp_%'
|
||||
AND a.authority_siruta IS NULL
|
||||
AND a.authority_cui IS NOT NULL
|
||||
AND cl.siruta IS NOT NULL
|
||||
AND cl.cui = a.authority_cui;
|
||||
|
||||
-- Suppliers — may have "RO " prefix, strip and retry the rest
|
||||
UPDATE seap.announcements a
|
||||
SET supplier_siruta = cl.siruta
|
||||
FROM seap.cui_location cl
|
||||
WHERE a.source LIKE 'wsp_%'
|
||||
AND a.supplier_siruta IS NULL
|
||||
AND a.supplier_cui IS NOT NULL
|
||||
AND cl.siruta IS NOT NULL
|
||||
AND cl.cui = trim(regexp_replace(a.supplier_cui, '^RO\s*', '', 'i'));
|
||||
|
||||
-- Extend uat_procurement_stats view to include WSP types
|
||||
DROP MATERIALIZED VIEW IF EXISTS seap.uat_procurement_stats CASCADE;
|
||||
|
||||
CREATE MATERIALIZED VIEW seap.uat_procurement_stats AS
|
||||
SELECT
|
||||
u.siruta,
|
||||
u.name AS uat_name,
|
||||
u.county,
|
||||
COALESCE(s.da_count, 0::bigint) AS da_count,
|
||||
COALESCE(s.da_value, 0::numeric) AS da_total_value,
|
||||
COALESCE(s.contract_count, 0::bigint) AS notice_count,
|
||||
COALESCE(s.contract_value, 0::numeric) AS notice_total_value,
|
||||
COALESCE(s.total_count, 0::bigint) AS total_contracts,
|
||||
COALESCE(s.total_value, 0::numeric) AS total_value
|
||||
FROM "GisUat" u
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
authority_siruta AS siruta,
|
||||
count(*) FILTER (WHERE type = 'da') AS da_count,
|
||||
sum(awarded_value) FILTER (WHERE type = 'da') AS da_value,
|
||||
count(*) FILTER (WHERE type IN (
|
||||
'contract', 'atribuire_fara', 'ted_notice',
|
||||
'ca_notice', 'rfq_notice'
|
||||
)) AS contract_count,
|
||||
sum(awarded_value) FILTER (WHERE type IN (
|
||||
'contract', 'atribuire_fara', 'ted_notice',
|
||||
'ca_notice', 'rfq_notice'
|
||||
)) AS contract_value,
|
||||
count(*) AS total_count,
|
||||
sum(COALESCE(awarded_value, estimated_value, 0::numeric)) AS total_value
|
||||
FROM seap.announcements
|
||||
WHERE authority_siruta IS NOT NULL
|
||||
GROUP BY authority_siruta
|
||||
) s ON s.siruta = u.siruta;
|
||||
|
||||
CREATE UNIQUE INDEX uq_uat_proc_stats ON seap.uat_procurement_stats(siruta);
|
||||
CREATE INDEX idx_uat_proc_stats_value ON seap.uat_procurement_stats(total_value DESC NULLS LAST);
|
||||
CREATE INDEX idx_uat_proc_stats_county ON seap.uat_procurement_stats(county);
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,71 @@
|
||||
-- CPV nomenclature: 9,454 codes with Romanian names + EU emojis.
|
||||
-- Loaded from samhallskod/cpv-eu (data sourced from official EU CPV 2008 XML).
|
||||
BEGIN;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS seap.cpv_codes (
|
||||
code TEXT PRIMARY KEY, -- 8-digit (no check digit), e.g. '45000000'
|
||||
code_full TEXT, -- 8-digit + check, e.g. '45000000-7'
|
||||
name_ro TEXT NOT NULL,
|
||||
name_en TEXT,
|
||||
level INT NOT NULL, -- 1=division (45), 2=group (450), 3=class (4500), ...
|
||||
division_code TEXT NOT NULL, -- first 2 digits + 6 zeroes, e.g. '45000000' (top-level parent)
|
||||
parent_code TEXT, -- one level up
|
||||
emoji TEXT, -- only set on division level
|
||||
imported_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cpv_division ON seap.cpv_codes(division_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_cpv_parent ON seap.cpv_codes(parent_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_cpv_level ON seap.cpv_codes(level);
|
||||
CREATE INDEX IF NOT EXISTS idx_cpv_name_trgm ON seap.cpv_codes USING gin(name_ro gin_trgm_ops);
|
||||
|
||||
|
||||
-- Helper: normalize "45123456-7" or "45123456" or empty → "45123456" (8-digit, no dash)
|
||||
CREATE OR REPLACE FUNCTION seap.cpv_normalize(code TEXT)
|
||||
RETURNS TEXT AS $$
|
||||
BEGIN
|
||||
IF code IS NULL OR code = '' THEN RETURN NULL; END IF;
|
||||
-- Strip the check digit suffix (-X) and any whitespace
|
||||
RETURN regexp_replace(trim(code), '-[0-9]$', '');
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Helper: get division code (first 2 digits + 6 zeros)
|
||||
CREATE OR REPLACE FUNCTION seap.cpv_division(code TEXT)
|
||||
RETURNS TEXT AS $$
|
||||
BEGIN
|
||||
IF code IS NULL OR length(code) < 2 THEN RETURN NULL; END IF;
|
||||
RETURN substr(seap.cpv_normalize(code), 1, 2) || '000000';
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Get name_ro for a code, fallback to division name, fallback to code itself
|
||||
CREATE OR REPLACE FUNCTION seap.cpv_name(code TEXT)
|
||||
RETURNS TEXT AS $$
|
||||
DECLARE
|
||||
result TEXT;
|
||||
BEGIN
|
||||
SELECT name_ro INTO result FROM seap.cpv_codes WHERE code = seap.cpv_normalize($1);
|
||||
IF result IS NOT NULL THEN RETURN result; END IF;
|
||||
SELECT name_ro INTO result FROM seap.cpv_codes WHERE code = seap.cpv_division($1);
|
||||
IF result IS NOT NULL THEN RETURN result; END IF;
|
||||
RETURN $1;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
-- Get top-level category name + emoji for any code
|
||||
CREATE OR REPLACE VIEW seap.cpv_division_lookup AS
|
||||
SELECT code AS division_code, name_ro AS division_name, emoji
|
||||
FROM seap.cpv_codes WHERE level = 1;
|
||||
|
||||
|
||||
-- Add denormalized columns to announcements for fast queries
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS cpv_division TEXT;
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS cpv_name_ro TEXT;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_cpv_division ON seap.announcements(cpv_division);
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,233 @@
|
||||
-- Risk flags (red flags) for procurement transparency, based on OCP indicators.
|
||||
-- Idempotent: safe to re-run.
|
||||
BEGIN;
|
||||
|
||||
-- ── Column on announcements ──
|
||||
ALTER TABLE seap.announcements
|
||||
ADD COLUMN IF NOT EXISTS risk_flags JSONB;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_risk_flags
|
||||
ON seap.announcements USING gin(risk_flags)
|
||||
WHERE risk_flags IS NOT NULL AND jsonb_array_length(risk_flags) > 0;
|
||||
|
||||
|
||||
-- ── Materialized view: per-CPV-division median awarded value ──
|
||||
DROP MATERIALIZED VIEW IF EXISTS seap.mv_cpv_median_value CASCADE;
|
||||
CREATE MATERIALIZED VIEW seap.mv_cpv_median_value AS
|
||||
SELECT
|
||||
cpv_division,
|
||||
count(*)::int AS contracts,
|
||||
percentile_cont(0.5) WITHIN GROUP (ORDER BY awarded_value)::numeric(15,2) AS median_value,
|
||||
avg(awarded_value)::numeric(15,2) AS avg_value,
|
||||
percentile_cont(0.95) WITHIN GROUP (ORDER BY awarded_value)::numeric(15,2) AS p95_value
|
||||
FROM seap.announcements
|
||||
WHERE awarded_value IS NOT NULL
|
||||
AND awarded_value > 0
|
||||
AND cpv_division IS NOT NULL
|
||||
GROUP BY cpv_division
|
||||
HAVING count(*) >= 5;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_cpv_median_pk
|
||||
ON seap.mv_cpv_median_value(cpv_division);
|
||||
|
||||
|
||||
-- ── Materialized view: authority supplier concentration (top supplier % of yearly value) ──
|
||||
DROP MATERIALIZED VIEW IF EXISTS seap.mv_authority_concentration CASCADE;
|
||||
CREATE MATERIALIZED VIEW seap.mv_authority_concentration AS
|
||||
WITH yearly_pairs AS (
|
||||
SELECT
|
||||
a.authority_cui,
|
||||
MIN(a.authority_name) AS authority_name,
|
||||
EXTRACT(YEAR FROM a.publication_date)::int AS year,
|
||||
a.supplier_cui,
|
||||
MIN(a.supplier_name) AS supplier_name,
|
||||
SUM(a.awarded_value)::numeric(15,2) AS total_value,
|
||||
COUNT(*)::int AS contracts
|
||||
FROM seap.announcements a
|
||||
WHERE a.authority_cui IS NOT NULL
|
||||
AND a.supplier_cui IS NOT NULL
|
||||
AND a.awarded_value IS NOT NULL
|
||||
AND a.awarded_value > 0
|
||||
AND a.publication_date IS NOT NULL
|
||||
AND a.publication_date >= now() - interval '36 months'
|
||||
GROUP BY a.authority_cui, EXTRACT(YEAR FROM a.publication_date), a.supplier_cui
|
||||
),
|
||||
yearly_totals AS (
|
||||
SELECT
|
||||
authority_cui,
|
||||
year,
|
||||
SUM(total_value) AS year_total,
|
||||
SUM(contracts) AS year_contracts
|
||||
FROM yearly_pairs
|
||||
GROUP BY authority_cui, year
|
||||
),
|
||||
ranked AS (
|
||||
SELECT
|
||||
p.authority_cui,
|
||||
p.authority_name,
|
||||
p.year,
|
||||
p.supplier_cui,
|
||||
p.supplier_name,
|
||||
p.total_value,
|
||||
p.contracts,
|
||||
t.year_total,
|
||||
t.year_contracts,
|
||||
ROW_NUMBER() OVER (PARTITION BY p.authority_cui, p.year ORDER BY p.total_value DESC) AS rn,
|
||||
(p.total_value / NULLIF(t.year_total, 0))::numeric(6,4) AS share
|
||||
FROM yearly_pairs p
|
||||
JOIN yearly_totals t USING (authority_cui, year)
|
||||
)
|
||||
SELECT
|
||||
authority_cui,
|
||||
authority_name,
|
||||
year,
|
||||
supplier_cui AS top_supplier_cui,
|
||||
supplier_name AS top_supplier_name,
|
||||
total_value AS top_supplier_value,
|
||||
contracts AS top_supplier_contracts,
|
||||
year_total,
|
||||
year_contracts,
|
||||
share AS top_supplier_share
|
||||
FROM ranked
|
||||
WHERE rn = 1
|
||||
AND year_total >= 100000; -- skip tiny totals (noise)
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_auth_conc_pk
|
||||
ON seap.mv_authority_concentration(authority_cui, year);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_auth_conc_share
|
||||
ON seap.mv_authority_concentration(top_supplier_share DESC NULLS LAST);
|
||||
|
||||
|
||||
-- ── View: single-bidder contracts ──
|
||||
DROP VIEW IF EXISTS seap.v_single_bidder CASCADE;
|
||||
CREATE VIEW seap.v_single_bidder AS
|
||||
SELECT a.*
|
||||
FROM seap.announcements a
|
||||
WHERE a.type = 'ca_notice'
|
||||
AND (
|
||||
a.num_offers = 1
|
||||
OR (
|
||||
a.details IS NOT NULL
|
||||
AND jsonb_typeof(a.details->'all_winners') = 'array'
|
||||
AND jsonb_array_length(a.details->'all_winners') = 1
|
||||
)
|
||||
);
|
||||
|
||||
|
||||
-- ── Function: compute risk flags for a single announcement ──
|
||||
-- Returns JSONB array of { code, severity, label, detail? }
|
||||
CREATE OR REPLACE FUNCTION seap.compute_announcement_flags(
|
||||
p_id BIGINT
|
||||
) RETURNS JSONB
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
DECLARE
|
||||
rec RECORD;
|
||||
flags JSONB := '[]'::jsonb;
|
||||
v_median NUMERIC;
|
||||
BEGIN
|
||||
SELECT a.id, a.type, a.publication_date, a.deadline_submission,
|
||||
a.awarded_value, a.estimated_value, a.cpv_division,
|
||||
a.num_offers, a.details
|
||||
INTO rec
|
||||
FROM seap.announcements a WHERE a.id = p_id;
|
||||
|
||||
IF NOT FOUND THEN RETURN NULL; END IF;
|
||||
|
||||
-- 1) Single bidder (only meaningful for ca_notice with winner data)
|
||||
IF rec.type = 'ca_notice' THEN
|
||||
IF rec.num_offers = 1 THEN
|
||||
flags := flags || jsonb_build_object(
|
||||
'code', 'single_bidder',
|
||||
'severity', 'high',
|
||||
'label', 'Un singur ofertant'
|
||||
);
|
||||
ELSIF rec.details IS NOT NULL
|
||||
AND jsonb_typeof(rec.details->'all_winners') = 'array'
|
||||
AND jsonb_array_length(rec.details->'all_winners') = 1 THEN
|
||||
flags := flags || jsonb_build_object(
|
||||
'code', 'single_bidder',
|
||||
'severity', 'high',
|
||||
'label', 'Un singur câștigător'
|
||||
);
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- 2) Short deadline (only c_notice / rfq_invitation have submission deadlines)
|
||||
IF rec.type IN ('c_notice','rfq_invitation')
|
||||
AND rec.publication_date IS NOT NULL
|
||||
AND rec.deadline_submission IS NOT NULL
|
||||
AND (rec.deadline_submission - rec.publication_date) < interval '10 days' THEN
|
||||
flags := flags || jsonb_build_object(
|
||||
'code', 'short_deadline',
|
||||
'severity', 'medium',
|
||||
'label', 'Termen scurt',
|
||||
'detail', EXTRACT(EPOCH FROM (rec.deadline_submission - rec.publication_date))/86400.0
|
||||
);
|
||||
END IF;
|
||||
|
||||
-- 3) Suspicious savings: awarded_value < 50% of estimated
|
||||
IF rec.awarded_value IS NOT NULL
|
||||
AND rec.estimated_value IS NOT NULL
|
||||
AND rec.awarded_value > 0
|
||||
AND rec.estimated_value > 0
|
||||
AND rec.awarded_value < 0.5 * rec.estimated_value THEN
|
||||
flags := flags || jsonb_build_object(
|
||||
'code', 'suspicious_savings',
|
||||
'severity', 'medium',
|
||||
'label', 'Economii suspecte',
|
||||
'detail', round(100.0 * (1 - rec.awarded_value / rec.estimated_value))::int
|
||||
);
|
||||
END IF;
|
||||
|
||||
-- 5) Overprice: awarded_value > 2 * median per CPV division
|
||||
IF rec.awarded_value IS NOT NULL
|
||||
AND rec.awarded_value > 0
|
||||
AND rec.cpv_division IS NOT NULL THEN
|
||||
SELECT median_value INTO v_median
|
||||
FROM seap.mv_cpv_median_value
|
||||
WHERE cpv_division = rec.cpv_division;
|
||||
IF v_median IS NOT NULL AND v_median > 0
|
||||
AND rec.awarded_value > 2 * v_median THEN
|
||||
flags := flags || jsonb_build_object(
|
||||
'code', 'overprice',
|
||||
'severity', 'medium',
|
||||
'label', 'Peste piață',
|
||||
'detail', round((rec.awarded_value / v_median)::numeric, 1)
|
||||
);
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN flags;
|
||||
END;
|
||||
$$;
|
||||
|
||||
|
||||
-- ── Function: refresh all risk-related materialized views ──
|
||||
CREATE OR REPLACE FUNCTION seap.refresh_risk_views()
|
||||
RETURNS VOID
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_cpv_median_value;
|
||||
EXCEPTION WHEN OTHERS THEN
|
||||
REFRESH MATERIALIZED VIEW seap.mv_cpv_median_value;
|
||||
END;
|
||||
$$;
|
||||
|
||||
CREATE OR REPLACE FUNCTION seap.refresh_concentration()
|
||||
RETURNS VOID
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_authority_concentration;
|
||||
EXCEPTION WHEN OTHERS THEN
|
||||
REFRESH MATERIALIZED VIEW seap.mv_authority_concentration;
|
||||
END;
|
||||
$$;
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- Initial population (non-transactional)
|
||||
REFRESH MATERIALIZED VIEW seap.mv_cpv_median_value;
|
||||
REFRESH MATERIALIZED VIEW seap.mv_authority_concentration;
|
||||
@@ -0,0 +1,94 @@
|
||||
-- Per-UAT KPI materialized view powering /harta v2 multi-metric choropleth.
|
||||
-- Columns:
|
||||
-- total_contracts, total_value, distinct_suppliers
|
||||
-- direct_pct — share of value awarded via direct procurement (type='da')
|
||||
-- framework_pct — share via framework agreements
|
||||
-- hhi_suppliers — Herfindahl-Hirschman index 0..10000 (DOJ thresholds: <1500 ok, 1500-2500 moderate, >2500 concentrated)
|
||||
-- top_supplier_share — biggest single-supplier dependency 0..1
|
||||
-- q4_spike — Q4 value / (yearly_avg_quarter) for last full year; >1.5 = spike, NULL if no data
|
||||
--
|
||||
-- Refresh: weekly cron — REFRESH MATERIALIZED VIEW CONCURRENTLY seap.uat_kpi;
|
||||
-- Idempotent: safe to re-run.
|
||||
|
||||
BEGIN;
|
||||
|
||||
DROP MATERIALIZED VIEW IF EXISTS seap.uat_kpi CASCADE;
|
||||
|
||||
CREATE MATERIALIZED VIEW seap.uat_kpi AS
|
||||
WITH base AS (
|
||||
SELECT
|
||||
a.authority_siruta AS siruta,
|
||||
a.authority_cui,
|
||||
a.supplier_cui,
|
||||
a.type,
|
||||
a.awarded_value,
|
||||
a.publication_date,
|
||||
a.framework_agreement
|
||||
FROM seap.announcements a
|
||||
WHERE a.authority_siruta IS NOT NULL
|
||||
),
|
||||
uat_totals AS (
|
||||
SELECT
|
||||
siruta,
|
||||
COUNT(*)::int AS total_contracts,
|
||||
COALESCE(SUM(awarded_value), 0)::numeric(20,2) AS total_value,
|
||||
COALESCE(SUM(awarded_value) FILTER (WHERE type = 'da'), 0)::numeric(20,2) AS direct_value,
|
||||
COALESCE(SUM(awarded_value) FILTER (WHERE framework_agreement = true), 0)::numeric(20,2) AS framework_value,
|
||||
COUNT(DISTINCT supplier_cui)::int AS distinct_suppliers
|
||||
FROM base
|
||||
GROUP BY siruta
|
||||
),
|
||||
supplier_shares AS (
|
||||
SELECT
|
||||
siruta,
|
||||
supplier_cui,
|
||||
SUM(awarded_value) / NULLIF(SUM(SUM(awarded_value)) OVER (PARTITION BY siruta), 0) AS ratio
|
||||
FROM base
|
||||
WHERE supplier_cui IS NOT NULL AND awarded_value IS NOT NULL
|
||||
GROUP BY siruta, supplier_cui
|
||||
),
|
||||
hhi_calc AS (
|
||||
SELECT
|
||||
siruta,
|
||||
COALESCE(SUM(POWER(ratio, 2)) * 10000, 0) AS hhi,
|
||||
COALESCE(MAX(ratio), 0) AS top_supplier_share
|
||||
FROM supplier_shares
|
||||
GROUP BY siruta
|
||||
),
|
||||
last_full_year AS (
|
||||
SELECT extract(year from now()) - 1 AS yr
|
||||
),
|
||||
q4_data AS (
|
||||
SELECT
|
||||
siruta,
|
||||
COALESCE(SUM(awarded_value) FILTER (WHERE extract(quarter FROM publication_date) = 4), 0)::numeric AS q4_value,
|
||||
COALESCE(SUM(awarded_value), 0)::numeric AS yearly_value
|
||||
FROM base
|
||||
WHERE extract(year FROM publication_date) = (SELECT yr FROM last_full_year)
|
||||
GROUP BY siruta
|
||||
)
|
||||
SELECT
|
||||
ut.siruta,
|
||||
ut.total_contracts,
|
||||
ut.total_value,
|
||||
ut.distinct_suppliers,
|
||||
CASE WHEN ut.total_value > 0 THEN ut.direct_value / ut.total_value ELSE 0 END AS direct_pct,
|
||||
CASE WHEN ut.total_value > 0 THEN ut.framework_value / ut.total_value ELSE 0 END AS framework_pct,
|
||||
COALESCE(hh.hhi, 0)::numeric(10,2) AS hhi_suppliers,
|
||||
COALESCE(hh.top_supplier_share, 0)::numeric(8,4) AS top_supplier_share,
|
||||
CASE WHEN q4.yearly_value > 0 THEN q4.q4_value / (q4.yearly_value / 4) ELSE NULL END AS q4_spike
|
||||
FROM uat_totals ut
|
||||
LEFT JOIN hhi_calc hh ON hh.siruta = ut.siruta
|
||||
LEFT JOIN q4_data q4 ON q4.siruta = ut.siruta;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_uat_kpi_pk ON seap.uat_kpi(siruta);
|
||||
CREATE INDEX IF NOT EXISTS idx_uat_kpi_value ON seap.uat_kpi(total_value DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_uat_kpi_direct ON seap.uat_kpi(direct_pct DESC) WHERE total_contracts > 5;
|
||||
CREATE INDEX IF NOT EXISTS idx_uat_kpi_hhi ON seap.uat_kpi(hhi_suppliers DESC) WHERE total_contracts > 5;
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- Refresh helper (idempotent)
|
||||
CREATE OR REPLACE FUNCTION seap.refresh_uat_kpi() RETURNS void LANGUAGE sql AS $$
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.uat_kpi;
|
||||
$$;
|
||||
@@ -0,0 +1,58 @@
|
||||
-- Full-text search infrastructure for /api/cauta and /achizitii/cauta.
|
||||
-- Uses 'simple' config + unaccent for diacritic-insensitive matching, since RO
|
||||
-- doesn't have a built-in PG text search config and we don't want stemming bias.
|
||||
--
|
||||
-- Idempotent: safe to re-run.
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Ensure unaccent extension
|
||||
CREATE EXTENSION IF NOT EXISTS unaccent;
|
||||
|
||||
-- Wrap unaccent as IMMUTABLE so it can be used in expression indexes / generated cols.
|
||||
-- Safe because we don't reload the unaccent dictionary at runtime.
|
||||
CREATE OR REPLACE FUNCTION seap.immutable_unaccent(text) RETURNS text
|
||||
LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT
|
||||
AS $$ SELECT public.unaccent('public.unaccent', $1) $$;
|
||||
|
||||
-- Plain (non-generated) tsvector column populated by trigger.
|
||||
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS search_tsv tsvector;
|
||||
|
||||
CREATE OR REPLACE FUNCTION seap.update_search_tsv() RETURNS trigger
|
||||
LANGUAGE plpgsql AS $$
|
||||
BEGIN
|
||||
NEW.search_tsv :=
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.title, ''))), 'A') ||
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.description, ''))), 'B') ||
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.authority_name, ''))), 'C') ||
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.supplier_name, ''))), 'C') ||
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.cpv_name_ro, ''))), 'D') ||
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.cpv_name, ''))), 'D');
|
||||
RETURN NEW;
|
||||
END $$;
|
||||
|
||||
DROP TRIGGER IF EXISTS trg_announcements_search_tsv ON seap.announcements;
|
||||
CREATE TRIGGER trg_announcements_search_tsv
|
||||
BEFORE INSERT OR UPDATE OF title, description, authority_name, supplier_name, cpv_name_ro, cpv_name
|
||||
ON seap.announcements
|
||||
FOR EACH ROW EXECUTE FUNCTION seap.update_search_tsv();
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_search_tsv ON seap.announcements USING gin(search_tsv);
|
||||
|
||||
-- Title-only trgm for "starts-with" or substring autocompletes
|
||||
CREATE INDEX IF NOT EXISTS idx_ann_title_trgm
|
||||
ON seap.announcements USING gin(title gin_trgm_ops);
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- Backfill existing rows (run outside the transaction). Long-running on 642K
|
||||
-- rows but does NOT block reads.
|
||||
UPDATE seap.announcements
|
||||
SET search_tsv =
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(title, ''))), 'A') ||
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(description, ''))), 'B') ||
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(authority_name, ''))), 'C') ||
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(supplier_name, ''))), 'C') ||
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(cpv_name_ro, ''))), 'D') ||
|
||||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(cpv_name, ''))), 'D')
|
||||
WHERE search_tsv IS NULL;
|
||||
@@ -0,0 +1,165 @@
|
||||
-- Materialized views for slow /achizitii/retete pages.
|
||||
-- Refresh nightly via vreaudigital-mvs.timer.
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────
|
||||
-- mv_top_cpv_divisions: powers /retete/top-categorii-bani + cpv-directe-mari
|
||||
-- ──────────────────────────────────────────────────────────────────────
|
||||
DROP MATERIALIZED VIEW IF EXISTS seap.mv_top_cpv_divisions CASCADE;
|
||||
CREATE MATERIALIZED VIEW seap.mv_top_cpv_divisions AS
|
||||
SELECT
|
||||
a.cpv_division,
|
||||
c.name_ro AS cpv_name,
|
||||
c.emoji,
|
||||
COUNT(*)::int AS contracts,
|
||||
COALESCE(SUM(a.awarded_value), 0)::numeric(20,2) AS total_value,
|
||||
COALESCE(SUM(a.awarded_value) FILTER (WHERE a.type = 'da'), 0)::numeric(20,2) AS direct_value,
|
||||
COUNT(DISTINCT a.authority_cui)::int AS distinct_authorities,
|
||||
COUNT(DISTINCT a.supplier_cui)::int AS distinct_suppliers,
|
||||
CASE WHEN COALESCE(SUM(a.awarded_value), 0) > 0
|
||||
THEN COALESCE(SUM(a.awarded_value) FILTER (WHERE a.type = 'da'), 0) / SUM(a.awarded_value)
|
||||
ELSE 0
|
||||
END::numeric(8,4) AS direct_pct
|
||||
FROM seap.announcements a
|
||||
LEFT JOIN seap.cpv_codes c ON c.code = a.cpv_division
|
||||
WHERE a.cpv_division IS NOT NULL
|
||||
AND a.awarded_value IS NOT NULL
|
||||
GROUP BY a.cpv_division, c.name_ro, c.emoji;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_top_cpv_div_pk ON seap.mv_top_cpv_divisions(cpv_division);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_top_cpv_div_value ON seap.mv_top_cpv_divisions(total_value DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_top_cpv_div_directpct ON seap.mv_top_cpv_divisions(direct_pct DESC) WHERE total_value >= 100000000;
|
||||
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────
|
||||
-- mv_top_suppliers: powers /retete/top-firme-castigatoare + firme-multe-judete
|
||||
-- ──────────────────────────────────────────────────────────────────────
|
||||
DROP MATERIALIZED VIEW IF EXISTS seap.mv_top_suppliers CASCADE;
|
||||
CREATE MATERIALIZED VIEW seap.mv_top_suppliers AS
|
||||
WITH agg AS (
|
||||
SELECT
|
||||
regexp_replace(upper(a.supplier_cui), '(^RO)|\s+', '', 'g') AS cui_norm,
|
||||
MIN(a.supplier_name) AS name,
|
||||
MIN(cl.county) AS county,
|
||||
COUNT(*)::int AS contracts,
|
||||
COALESCE(SUM(a.awarded_value), 0)::numeric(20,2) AS total_value,
|
||||
COUNT(DISTINCT a.authority_cui)::int AS distinct_buyers,
|
||||
COUNT(DISTINCT acl.county)::int AS county_count
|
||||
FROM seap.announcements a
|
||||
LEFT JOIN seap.cui_location cl ON cl.cui = regexp_replace(upper(a.supplier_cui), '(^RO)|\s+', '', 'g')
|
||||
LEFT JOIN seap.cui_location acl ON acl.cui = a.authority_cui
|
||||
WHERE a.supplier_cui IS NOT NULL
|
||||
AND a.awarded_value IS NOT NULL
|
||||
GROUP BY 1
|
||||
)
|
||||
SELECT * FROM agg WHERE total_value > 0;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_top_suppliers_pk ON seap.mv_top_suppliers(cui_norm);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_top_suppliers_value ON seap.mv_top_suppliers(total_value DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_top_suppliers_counties ON seap.mv_top_suppliers(county_count DESC NULLS LAST);
|
||||
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────
|
||||
-- mv_top_authorities: powers /retete/top-autoritati-cheltuitori
|
||||
-- ──────────────────────────────────────────────────────────────────────
|
||||
DROP MATERIALIZED VIEW IF EXISTS seap.mv_top_authorities CASCADE;
|
||||
CREATE MATERIALIZED VIEW seap.mv_top_authorities AS
|
||||
SELECT
|
||||
a.authority_cui,
|
||||
MIN(a.authority_name) AS name,
|
||||
MIN(cl.county) AS county,
|
||||
MIN(a.authority_type) AS authority_type,
|
||||
MIN(cl.siruta) AS siruta,
|
||||
COUNT(*)::int AS contracts,
|
||||
COALESCE(SUM(a.awarded_value), 0)::numeric(20,2) AS total_value,
|
||||
COUNT(DISTINCT a.supplier_cui)::int AS distinct_suppliers
|
||||
FROM seap.announcements a
|
||||
LEFT JOIN seap.cui_location cl ON cl.cui = a.authority_cui
|
||||
WHERE a.authority_cui IS NOT NULL
|
||||
AND a.awarded_value IS NOT NULL
|
||||
GROUP BY a.authority_cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_top_auth_pk ON seap.mv_top_authorities(authority_cui);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_top_auth_value ON seap.mv_top_authorities(total_value DESC NULLS LAST);
|
||||
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────
|
||||
-- mv_recurrent_pairs: powers /retete/perechi-recurente
|
||||
-- ──────────────────────────────────────────────────────────────────────
|
||||
DROP MATERIALIZED VIEW IF EXISTS seap.mv_recurrent_pairs CASCADE;
|
||||
CREATE MATERIALIZED VIEW seap.mv_recurrent_pairs AS
|
||||
SELECT
|
||||
a.authority_cui,
|
||||
MIN(a.authority_name) AS authority_name,
|
||||
regexp_replace(upper(a.supplier_cui), '(^RO)|\s+', '', 'g') AS supplier_cui_norm,
|
||||
MIN(a.supplier_name) AS supplier_name,
|
||||
MIN(cl.county) AS county,
|
||||
COUNT(*)::int AS contracts,
|
||||
COALESCE(SUM(a.awarded_value), 0)::numeric(20,2) AS total_value,
|
||||
MIN(EXTRACT(YEAR FROM a.publication_date))::int AS first_year,
|
||||
MAX(EXTRACT(YEAR FROM a.publication_date))::int AS last_year
|
||||
FROM seap.announcements a
|
||||
LEFT JOIN seap.cui_location cl ON cl.cui = a.authority_cui
|
||||
WHERE a.authority_cui IS NOT NULL
|
||||
AND a.supplier_cui IS NOT NULL
|
||||
AND a.awarded_value IS NOT NULL
|
||||
GROUP BY a.authority_cui, regexp_replace(upper(a.supplier_cui), '(^RO)|\s+', '', 'g')
|
||||
HAVING COUNT(*) >= 5;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_recurr_pk ON seap.mv_recurrent_pairs(authority_cui, supplier_cui_norm);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_recurr_value ON seap.mv_recurrent_pairs(total_value DESC NULLS LAST);
|
||||
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────
|
||||
-- mv_supplier_cpv_share: powers /retete/firme-specializate-extrem
|
||||
-- ──────────────────────────────────────────────────────────────────────
|
||||
DROP MATERIALIZED VIEW IF EXISTS seap.mv_supplier_cpv_share CASCADE;
|
||||
CREATE MATERIALIZED VIEW seap.mv_supplier_cpv_share AS
|
||||
WITH supplier_cpv AS (
|
||||
SELECT
|
||||
regexp_replace(upper(a.supplier_cui), '(^RO)|\s+', '', 'g') AS cui,
|
||||
MIN(a.supplier_name) AS name,
|
||||
a.cpv_division,
|
||||
MIN(c.name_ro) AS cpv_name,
|
||||
MIN(c.emoji) AS emoji,
|
||||
COUNT(*)::int AS contracts,
|
||||
COALESCE(SUM(a.awarded_value), 0)::numeric(20,2) AS cpv_value
|
||||
FROM seap.announcements a
|
||||
LEFT JOIN seap.cpv_codes c ON c.code = a.cpv_division
|
||||
WHERE a.supplier_cui IS NOT NULL
|
||||
AND a.cpv_division IS NOT NULL
|
||||
AND a.awarded_value IS NOT NULL
|
||||
GROUP BY 1, a.cpv_division
|
||||
),
|
||||
supplier_total AS (
|
||||
SELECT cui, SUM(cpv_value) AS total
|
||||
FROM supplier_cpv
|
||||
GROUP BY cui
|
||||
HAVING SUM(cpv_value) >= 5000000
|
||||
),
|
||||
ranked AS (
|
||||
SELECT
|
||||
sc.cui, sc.name, sc.cpv_division, sc.cpv_name, sc.emoji,
|
||||
sc.contracts, sc.cpv_value,
|
||||
st.total,
|
||||
(sc.cpv_value / st.total)::numeric(8,4) AS share,
|
||||
ROW_NUMBER() OVER (PARTITION BY sc.cui ORDER BY sc.cpv_value DESC) AS rn
|
||||
FROM supplier_cpv sc
|
||||
JOIN supplier_total st ON st.cui = sc.cui
|
||||
)
|
||||
SELECT * FROM ranked WHERE rn = 1;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_sup_cpv_pk ON seap.mv_supplier_cpv_share(cui);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_sup_cpv_share ON seap.mv_supplier_cpv_share(share DESC, total DESC);
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- Refresh helper
|
||||
CREATE OR REPLACE FUNCTION seap.refresh_recipe_mvs() RETURNS void LANGUAGE sql AS $$
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_cpv_divisions;
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_suppliers;
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_authorities;
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_recurrent_pairs;
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_supplier_cpv_share;
|
||||
$$;
|
||||
@@ -0,0 +1,161 @@
|
||||
-- Firms registry — extends seap.cui_location with full ONRC + ANAF data
|
||||
-- for ALL Romanian companies (~1.5M), not just those active in SEAP.
|
||||
--
|
||||
-- Sources:
|
||||
-- ONRC bulk on data.gov.ro (CC-BY 4.0): COD_INMATRICULARE-keyed CSV files
|
||||
-- ANAF webservicesp v9: per-CUI enrichment (status, address, contacts)
|
||||
-- Photon (Komoot) self-hosted: address → lat/lng geocoding
|
||||
--
|
||||
-- Idempotent: safe to re-run.
|
||||
|
||||
BEGIN;
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS firms;
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────
|
||||
-- Master firms table — one row per CUI (unique)
|
||||
-- ──────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS firms.entities (
|
||||
cui TEXT PRIMARY KEY,
|
||||
cod_inmatriculare TEXT, -- e.g. J40/630/1992 — ONRC primary key, NULL for PFAs without CUI
|
||||
euid TEXT, -- European identifier
|
||||
name TEXT NOT NULL,
|
||||
forma_juridica TEXT, -- SRL, SA, PFA, II, IF, etc.
|
||||
|
||||
-- ── Address (parsed from ONRC) ──
|
||||
adr_tara TEXT,
|
||||
adr_judet TEXT,
|
||||
adr_localitate TEXT,
|
||||
adr_strada TEXT,
|
||||
adr_numar TEXT,
|
||||
adr_bloc TEXT,
|
||||
adr_scara TEXT,
|
||||
adr_etaj TEXT,
|
||||
adr_apartament TEXT,
|
||||
adr_cod_postal TEXT,
|
||||
adr_sector TEXT,
|
||||
adr_completare TEXT, -- raw appendix
|
||||
adr_full TEXT, -- concatenated, used for geocoding query
|
||||
siruta TEXT, -- matched UAT siruta (joined with GisUat)
|
||||
|
||||
-- ── Geolocation ──
|
||||
lat DOUBLE PRECISION,
|
||||
lng DOUBLE PRECISION,
|
||||
geom GEOGRAPHY(POINT, 4326),
|
||||
geocode_source TEXT, -- 'photon', 'nominatim', 'siruta_centroid', 'manual'
|
||||
geocode_score REAL, -- 0..1 confidence
|
||||
|
||||
-- ── Registration ──
|
||||
data_inmatriculare DATE,
|
||||
registration_year INT,
|
||||
|
||||
-- ── Status (from ANAF v9 + ONRC stare_firma) ──
|
||||
is_active_anaf BOOLEAN, -- NULL=unknown, true=active, false=inactive (lista contribuabili inactivi)
|
||||
is_radiated_onrc BOOLEAN, -- ONRC stare_firma RADIATA
|
||||
is_vat_registered BOOLEAN, -- ANAF scpTVA active
|
||||
is_efactura BOOLEAN, -- ANAF statusRO_e_Factura
|
||||
status_text TEXT, -- decoded human-readable: "Activă", "Radiată", "Insolvență", etc.
|
||||
|
||||
-- ── Contact (best-effort, often NULL) ──
|
||||
phone TEXT,
|
||||
fax TEXT,
|
||||
web TEXT, -- from ONRC OD_FIRME.CSV.WEB column
|
||||
|
||||
-- ── Activity classification ──
|
||||
caen_principal TEXT, -- CAEN cod from ANAF
|
||||
caen_autorizate TEXT[], -- multi-row aggregate from OD_CAEN_AUTORIZAT.CSV
|
||||
|
||||
-- ── Foreign parent ──
|
||||
tara_firma_mama TEXT, -- from ONRC OD_FIRME.CSV.TARA_FIRMA_MAMA
|
||||
|
||||
-- ── Ownership / management (from ONRC reprezentanti) ──
|
||||
rep_legali JSONB, -- [{persoana, calitate, judet_localitate, tara}, ...]
|
||||
|
||||
-- ── Metadata ──
|
||||
source_onrc_dataset TEXT, -- e.g. 'firme-03-04-2026'
|
||||
anaf_fetched_at TIMESTAMPTZ,
|
||||
onrc_fetched_at TIMESTAMPTZ,
|
||||
geocoded_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_firms_cod_inmatriculare ON firms.entities(cod_inmatriculare) WHERE cod_inmatriculare IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_firms_county ON firms.entities(adr_judet);
|
||||
CREATE INDEX IF NOT EXISTS idx_firms_siruta ON firms.entities(siruta) WHERE siruta IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_firms_caen_principal ON firms.entities(caen_principal);
|
||||
CREATE INDEX IF NOT EXISTS idx_firms_geom ON firms.entities USING gist(geom);
|
||||
CREATE INDEX IF NOT EXISTS idx_firms_name_trgm ON firms.entities USING gin(name gin_trgm_ops);
|
||||
CREATE INDEX IF NOT EXISTS idx_firms_active ON firms.entities(is_active_anaf, is_radiated_onrc) WHERE is_active_anaf = true AND (is_radiated_onrc = false OR is_radiated_onrc IS NULL);
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────
|
||||
-- Staging tables for raw ONRC CSV imports (truncated each refresh)
|
||||
-- ──────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS firms.staging_onrc_firme (
|
||||
denumire TEXT,
|
||||
cui TEXT,
|
||||
cod_inmatriculare TEXT,
|
||||
data_inmatriculare TEXT, -- YYYY-MM-DD or empty
|
||||
euid TEXT,
|
||||
forma_juridica TEXT,
|
||||
adr_tara TEXT,
|
||||
adr_judet TEXT,
|
||||
adr_localitate TEXT,
|
||||
adr_strada TEXT,
|
||||
adr_numar TEXT,
|
||||
adr_bloc TEXT,
|
||||
adr_scara TEXT,
|
||||
adr_etaj TEXT,
|
||||
adr_apartament TEXT,
|
||||
adr_cod_postal TEXT,
|
||||
adr_sector TEXT,
|
||||
adr_completare TEXT,
|
||||
web TEXT,
|
||||
tara_firma_mama TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS firms.staging_onrc_caen (
|
||||
cod_inmatriculare TEXT,
|
||||
cod_caen TEXT,
|
||||
ver_caen TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS firms.staging_onrc_stare (
|
||||
cod_inmatriculare TEXT,
|
||||
cod_stare TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS firms.staging_onrc_reprezentanti (
|
||||
cod_inmatriculare TEXT,
|
||||
persoana TEXT,
|
||||
calitate TEXT,
|
||||
data_nastere TEXT,
|
||||
localitate_nastere TEXT,
|
||||
judet_nastere TEXT,
|
||||
tara_nastere TEXT,
|
||||
localitate TEXT,
|
||||
judet TEXT,
|
||||
tara TEXT
|
||||
);
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────
|
||||
-- Stare firma codelist (manually populated — short list ~10 codes)
|
||||
-- ──────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS firms.stare_codelist (
|
||||
cod TEXT PRIMARY KEY,
|
||||
label TEXT NOT NULL
|
||||
);
|
||||
|
||||
INSERT INTO firms.stare_codelist (cod, label) VALUES
|
||||
('1', 'Activă'),
|
||||
('2', 'Suspendată activitate'),
|
||||
('3', 'Dizolvare'),
|
||||
('4', 'Radiată'),
|
||||
('5', 'În lichidare'),
|
||||
('6', 'Insolvență'),
|
||||
('7', 'Reorganizare judiciară'),
|
||||
('8', 'Faliment'),
|
||||
('9', 'Întreruptă activitate')
|
||||
ON CONFLICT (cod) DO NOTHING;
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,75 @@
|
||||
-- Financial indicators per firm-year, from Ministerul Finanțelor "Situații financiare"
|
||||
-- annual datasets on data.gov.ro (CC-BY 4.0).
|
||||
--
|
||||
-- 21 indicators (I1-I20 + CAEN) extracted from balance sheet + P&L + headcount.
|
||||
-- Schema covers years 2020-2024 initially; older years available too if needed.
|
||||
|
||||
BEGIN;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS firms.financials (
|
||||
cui TEXT NOT NULL,
|
||||
year INT NOT NULL,
|
||||
caen TEXT,
|
||||
|
||||
-- ── Bilanț — active ──
|
||||
active_imobilizate NUMERIC(20,2), -- I1
|
||||
active_circulante NUMERIC(20,2), -- I2
|
||||
stocuri NUMERIC(20,2), -- I3
|
||||
creante NUMERIC(20,2), -- I4
|
||||
casa_banci NUMERIC(20,2), -- I5
|
||||
cheltuieli_avans NUMERIC(20,2), -- I6
|
||||
|
||||
-- ── Bilanț — datorii / pasive ──
|
||||
datorii NUMERIC(20,2), -- I7
|
||||
venituri_avans NUMERIC(20,2), -- I8
|
||||
provizioane NUMERIC(20,2), -- I9
|
||||
capitaluri_total NUMERIC(20,2), -- I10
|
||||
capital_subscris NUMERIC(20,2), -- I11
|
||||
patrimoniul_regiei NUMERIC(20,2), -- I12
|
||||
|
||||
-- ── Cont profit/pierdere ──
|
||||
cifra_afaceri NUMERIC(20,2), -- I13 (cifră afaceri netă)
|
||||
venituri_total NUMERIC(20,2), -- I14
|
||||
cheltuieli_total NUMERIC(20,2), -- I15
|
||||
profit_brut NUMERIC(20,2), -- I16
|
||||
pierdere_bruta NUMERIC(20,2), -- I17
|
||||
profit_net NUMERIC(20,2), -- I18
|
||||
pierdere_neta NUMERIC(20,2), -- I19
|
||||
|
||||
-- ── HR ──
|
||||
numar_salariati BIGINT, -- I20 (some data anomalies need wider range)
|
||||
|
||||
-- ── Metadata ──
|
||||
source TEXT DEFAULT 'mfinante.data.gov.ro',
|
||||
fetched_at TIMESTAMPTZ DEFAULT now(),
|
||||
|
||||
PRIMARY KEY (cui, year)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_fin_cui ON firms.financials(cui);
|
||||
CREATE INDEX IF NOT EXISTS idx_fin_year ON firms.financials(year);
|
||||
CREATE INDEX IF NOT EXISTS idx_fin_ca_desc ON firms.financials(year, cifra_afaceri DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_fin_profit_desc ON firms.financials(year, profit_net DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_fin_salariati_desc ON firms.financials(year, numar_salariati DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_fin_caen ON firms.financials(caen);
|
||||
|
||||
-- Materialized view: latest year financials per CUI for fast profile lookup
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS firms.mv_financials_latest AS
|
||||
SELECT DISTINCT ON (cui) *
|
||||
FROM firms.financials
|
||||
WHERE cui IS NOT NULL
|
||||
ORDER BY cui, year DESC;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_fin_latest_pk ON firms.mv_financials_latest(cui);
|
||||
|
||||
-- Staging table for raw CSV imports
|
||||
CREATE TABLE IF NOT EXISTS firms.staging_financials (
|
||||
cui TEXT,
|
||||
caen TEXT,
|
||||
i1 NUMERIC, i2 NUMERIC, i3 NUMERIC, i4 NUMERIC, i5 NUMERIC,
|
||||
i6 NUMERIC, i7 NUMERIC, i8 NUMERIC, i9 NUMERIC, i10 NUMERIC,
|
||||
i11 NUMERIC, i12 NUMERIC, i13 NUMERIC, i14 NUMERIC, i15 NUMERIC,
|
||||
i16 NUMERIC, i17 NUMERIC, i18 NUMERIC, i19 NUMERIC, i20 NUMERIC
|
||||
);
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,46 @@
|
||||
-- 014_firms_postal_codes.sql
|
||||
-- GeoNames RO postal codes (37915 entries, CC-BY 4.0).
|
||||
-- Used for fast batch geocoding of firms.entities at postal-code precision
|
||||
-- — covers ~2.07M firms (52%) with adr_cod_postal populated.
|
||||
-- Source: https://download.geonames.org/export/zip/RO.zip
|
||||
-- Refresh: yearly via cron (data updates ~yearly per GeoNames).
|
||||
|
||||
CREATE TABLE IF NOT EXISTS firms.postal_codes (
|
||||
postal_code text NOT NULL,
|
||||
place_name text NOT NULL,
|
||||
county text,
|
||||
county_code text,
|
||||
admin2_code text,
|
||||
admin3_code text,
|
||||
admin3_name text,
|
||||
lat numeric(9,6) NOT NULL,
|
||||
lng numeric(9,6) NOT NULL,
|
||||
accuracy int,
|
||||
PRIMARY KEY (postal_code, place_name)
|
||||
);
|
||||
|
||||
-- One row per postal code — when multiple places share a code, pick the one
|
||||
-- with the best accuracy (lowest int value in GeoNames is most precise).
|
||||
CREATE OR REPLACE VIEW firms.postal_codes_best AS
|
||||
SELECT DISTINCT ON (postal_code)
|
||||
postal_code, place_name, county, county_code, lat, lng, accuracy
|
||||
FROM firms.postal_codes
|
||||
ORDER BY postal_code, accuracy NULLS LAST, place_name;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_postal_codes_postal ON firms.postal_codes(postal_code);
|
||||
|
||||
-- Staging table for COPY from the GeoNames TSV layout.
|
||||
CREATE TABLE IF NOT EXISTS firms.staging_postal_codes (
|
||||
country_code text,
|
||||
postal_code text,
|
||||
place_name text,
|
||||
admin1_name text,
|
||||
admin1_code text,
|
||||
admin2_name text,
|
||||
admin2_code text,
|
||||
admin3_name text,
|
||||
admin3_code text,
|
||||
lat text,
|
||||
lng text,
|
||||
accuracy text
|
||||
);
|
||||
@@ -0,0 +1,32 @@
|
||||
-- 015_firms_onrc_extras.sql
|
||||
-- Two additional ONRC bulk CSVs we weren't importing yet:
|
||||
-- 1. od_reprezentanti_if.csv — administrators of "Întreprinderi Familiale"
|
||||
-- (~80K rows). The persoană field plus locality+county of birth gives us
|
||||
-- a separate small "owner registry" parallel to rep_legali on firms.entities.
|
||||
-- 2. od_sucursale_alte_state_membre.csv — branches of RO companies registered
|
||||
-- in other EU states (~tiny, ~hundreds of rows). Useful for follow-the-money
|
||||
-- questions like "RO firm with EU branches winning EU-funded contracts".
|
||||
--
|
||||
-- Both are keyed by cod_inmatriculare which we already have on firms.entities,
|
||||
-- so JOINs are trivial. Idempotent: TRUNCATE-and-reload on each ONRC snapshot.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS firms.reprezentanti_if (
|
||||
cod_inmatriculare text NOT NULL,
|
||||
nume text,
|
||||
data_nastere text, -- raw DD.MM.YYYY string from ONRC
|
||||
localitate_nastere text,
|
||||
judet_nastere text,
|
||||
tara_nastere text,
|
||||
calitate text
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_rep_if_cod ON firms.reprezentanti_if(cod_inmatriculare);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS firms.sucursale_ue (
|
||||
cod_inmatriculare text NOT NULL,
|
||||
tip_unitate text, -- usually "Sucursală"
|
||||
denumire_sucursala text,
|
||||
euid text,
|
||||
cod_fiscal_strain text, -- ONRC field is COD_FISCAL but it's the foreign one
|
||||
tara text -- destination country
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_sucursale_ue_cod ON firms.sucursale_ue(cod_inmatriculare);
|
||||
@@ -0,0 +1,97 @@
|
||||
-- 016_firms_financials_categories.sql
|
||||
-- Separate tables for the non-WEB_UU/BL_BS_SL MFP financial categories.
|
||||
-- Schemas differ enough between categories (ONG=46 indicators with separate
|
||||
-- non-profit + economic activity tracking; banks=23 IFRS-specific indicators)
|
||||
-- that lumping them with firms.financials would distort the existing schema.
|
||||
--
|
||||
-- We store raw indicators in JSONB to avoid 46-column tables and to absorb
|
||||
-- future ANAF schema tweaks without migrations. A handful of canonical
|
||||
-- columns lifted out of JSONB for fast querying / recipe filtering.
|
||||
|
||||
-- ─── ONG (Asociații, Fundații) ────────────────────────────────────────────
|
||||
-- Source: WEB_ONG_AN20XX.txt on data.gov.ro situatii_financiare_<year>
|
||||
-- Schema: CUI, CAEN, CAENO, i1..i46
|
||||
-- i12 = Capitaluri proprii
|
||||
-- i37 = Venituri totale (prevederi anuale)
|
||||
-- i38 = Venituri totale (realizat la 31.12)
|
||||
-- i39 = Cheltuieli totale (prevederi anuale)
|
||||
-- i40 = Cheltuieli totale (realizat la 31.12)
|
||||
-- i41 = Excedent / Profit (prevederi anuale)
|
||||
-- i42 = Excedent / Profit (realizat la 31.12)
|
||||
-- i45 = Personal activități fără scop patrimonial
|
||||
-- i46 = Personal activități economice
|
||||
|
||||
CREATE TABLE IF NOT EXISTS firms.financials_ong (
|
||||
cui text NOT NULL,
|
||||
year integer NOT NULL,
|
||||
caen text, -- activitate economică
|
||||
caeno text, -- activitate fără scop patrimonial
|
||||
-- Lifted convenience columns:
|
||||
capitaluri_proprii numeric(20,2), -- i12
|
||||
venituri_total numeric(20,2), -- i38
|
||||
cheltuieli_total numeric(20,2), -- i40
|
||||
excedent numeric(20,2), -- i42
|
||||
personal_neeconomic bigint, -- i45
|
||||
personal_economic bigint, -- i46
|
||||
-- Raw indicators for completeness:
|
||||
indicators jsonb NOT NULL, -- {i1..i46} all values
|
||||
source text DEFAULT 'mfinante:WEB_ONG',
|
||||
fetched_at timestamptz DEFAULT now(),
|
||||
PRIMARY KEY (cui, year)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_fin_ong_year ON firms.financials_ong(year);
|
||||
CREATE INDEX IF NOT EXISTS idx_fin_ong_caen ON firms.financials_ong(caen);
|
||||
|
||||
-- Staging table: 49 columns (CUI, CAEN, CAENO, i1..i46)
|
||||
CREATE TABLE IF NOT EXISTS firms.staging_ong (
|
||||
cui text,
|
||||
caen text,
|
||||
caeno text,
|
||||
i1 text, i2 text, i3 text, i4 text, i5 text, i6 text, i7 text,
|
||||
i8 text, i9 text, i10 text, i11 text, i12 text, i13 text, i14 text,
|
||||
i15 text, i16 text, i17 text, i18 text, i19 text, i20 text, i21 text,
|
||||
i22 text, i23 text, i24 text, i25 text, i26 text, i27 text, i28 text,
|
||||
i29 text, i30 text, i31 text, i32 text, i33 text, i34 text, i35 text,
|
||||
i36 text, i37 text, i38 text, i39 text, i40 text, i41 text, i42 text,
|
||||
i43 text, i44 text, i45 text, i46 text
|
||||
);
|
||||
|
||||
|
||||
-- ─── Bănci / Instituții de Credit ─────────────────────────────────────────
|
||||
-- Source: WEB_Inst_de_credit_20XX.txt on data.gov.ro situatii_financiare_<year>
|
||||
-- Schema: CUI, CAEN, i1..i23 (IFRS bank-specific)
|
||||
-- i6 = Active financiare evaluate la cost amortizat
|
||||
-- i14 = Capital social
|
||||
-- i17 = Profit / (-) pierdere aferent exercițiului
|
||||
-- i19 = Profit / pierdere din operațiuni continue înainte de impozitare
|
||||
-- i22 = Profit / pierdere aferent exercițiului
|
||||
-- i23 = Cifra de afaceri netă
|
||||
-- (Note: source CSV has a typo for i18 — column header "18" without "i"
|
||||
-- prefix. Importer treats it consistently as i18.)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS firms.financials_banks (
|
||||
cui text NOT NULL,
|
||||
year integer NOT NULL,
|
||||
caen text,
|
||||
-- Lifted convenience columns:
|
||||
active_financiare_amortiz numeric(20,2), -- i6
|
||||
capital_social numeric(20,2), -- i14
|
||||
profit_exercitiu numeric(20,2), -- i22
|
||||
profit_inainte_impozit numeric(20,2), -- i19
|
||||
cifra_afaceri numeric(20,2), -- i23
|
||||
-- Raw indicators for completeness:
|
||||
indicators jsonb NOT NULL, -- {i1..i23}
|
||||
source text DEFAULT 'mfinante:WEB_Inst_de_credit',
|
||||
fetched_at timestamptz DEFAULT now(),
|
||||
PRIMARY KEY (cui, year)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_fin_banks_year ON firms.financials_banks(year);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS firms.staging_banks (
|
||||
cui text,
|
||||
caen text,
|
||||
i1 text, i2 text, i3 text, i4 text, i5 text, i6 text, i7 text,
|
||||
i8 text, i9 text, i10 text, i11 text, i12 text, i13 text, i14 text,
|
||||
i15 text, i16 text, i17 text, i18 text, i19 text, i20 text, i21 text,
|
||||
i22 text, i23 text
|
||||
);
|
||||
@@ -0,0 +1,58 @@
|
||||
-- 017_fonduri_afir.sql
|
||||
-- AFIR (Agenția pentru Finanțarea Investițiilor Rurale) plăți FEGA + FEADR.
|
||||
-- Source: https://www.afir.ro/rapoarte/beneficiari-de-fonduri-europene/date-deschise/
|
||||
-- Format: XLSX bulk per year, ~560K rows/year, no direct CUI column.
|
||||
-- Strategy: load all rows, then fuzzy-match name → cui in a separate batch job.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS fonduri;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS fonduri.afir_plati (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
source_year smallint NOT NULL,
|
||||
beneficiar_name text NOT NULL,
|
||||
last_name text, -- empty for legal entities
|
||||
mama_cui text, -- mother company name + CUI when applicable (mostly empty)
|
||||
localitate text,
|
||||
cod_masura text,
|
||||
obiectiv text,
|
||||
data_start text,
|
||||
data_end text,
|
||||
fega_op numeric(20,2),
|
||||
fega_total numeric(20,2),
|
||||
feadr_op numeric(20,2),
|
||||
feadr_total numeric(20,2),
|
||||
op_amount numeric(20,2),
|
||||
cofinantare numeric(20,2),
|
||||
ue_total numeric(20,2),
|
||||
-- Enrichment (filled by separate matcher):
|
||||
cui text,
|
||||
cui_match_score real,
|
||||
cui_match_method text,
|
||||
matched_at timestamptz,
|
||||
fetched_at timestamptz DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_afir_year ON fonduri.afir_plati(source_year);
|
||||
CREATE INDEX IF NOT EXISTS idx_afir_cui ON fonduri.afir_plati(cui) WHERE cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_afir_cod_masura ON fonduri.afir_plati(cod_masura);
|
||||
CREATE INDEX IF NOT EXISTS idx_afir_localitate ON fonduri.afir_plati(localitate);
|
||||
CREATE INDEX IF NOT EXISTS idx_afir_name_trgm ON fonduri.afir_plati USING gin (beneficiar_name gin_trgm_ops);
|
||||
|
||||
-- Staging table for COPY (no PK, all text)
|
||||
CREATE TABLE IF NOT EXISTS fonduri.staging_afir (
|
||||
beneficiar_name text,
|
||||
last_name text,
|
||||
mama_cui text,
|
||||
localitate text,
|
||||
cod_masura text,
|
||||
obiectiv text,
|
||||
data_start text,
|
||||
data_end text,
|
||||
fega_op text,
|
||||
fega_total text,
|
||||
feadr_op text,
|
||||
feadr_total text,
|
||||
op_amount text,
|
||||
cofinantare text,
|
||||
ue_total text
|
||||
);
|
||||
@@ -0,0 +1,102 @@
|
||||
-- 018_fonduri_beneficiar_privat.sql
|
||||
-- Achiziții făcute de beneficiarii PRIVAȚI ai fondurilor europene
|
||||
-- (firme care au primit POIM/POR/PNRR/AFIR etc. și trebuie să facă achiziții
|
||||
-- transparente conform Manualului Beneficiarului — dar NU intră în SEAP fiindcă
|
||||
-- sunt privați, nu autorități publice).
|
||||
--
|
||||
-- Source: https://beneficiar.fonduri-ue.ro:8080/anunturi
|
||||
-- Volume: ~48,650 anunțuri (and growing) cu loturi atașate.
|
||||
-- Joinable to firms.entities prin name (no CUI in source — fuzzy match).
|
||||
-- Joinable to fonduri.afir_plati prin name (overlap pe beneficiari AFIR).
|
||||
-- Joinable to seap.announcements prin supplier_cui când suppliers selected pe
|
||||
-- aceste proceduri devin furnizori la stat (cross-source signal).
|
||||
|
||||
CREATE TABLE IF NOT EXISTS fonduri.beneficiar_anunt (
|
||||
id integer PRIMARY KEY, -- the numeric ID din URL /anunturi/details/2/{ID}
|
||||
-- Project linkage
|
||||
smis_proiect_id integer, -- id linkable cu /proiecte/details/{type}/{id}
|
||||
smis_proiect_type smallint, -- 1=SMIS, 2=MySMIS, 3=PNRR, etc. (URL prefix)
|
||||
smis_proiect_code text, -- "319946" (for display)
|
||||
smis_proiect_name text, -- "CONSTRUIRE CAMIN BATRANI..."
|
||||
-- Beneficiar (privat)
|
||||
beneficiar_name text NOT NULL,
|
||||
beneficiar_program_tag text, -- "SMIS" | "MySMIS" | other
|
||||
beneficiar_adresa text,
|
||||
beneficiar_contact text,
|
||||
beneficiar_telefon text,
|
||||
beneficiar_regiune text,
|
||||
beneficiar_judet text,
|
||||
beneficiar_localitate text,
|
||||
-- Anunț status / details
|
||||
procedura_status text, -- "în curs de ofertare" | "închisă"
|
||||
data_publicare date,
|
||||
data_limita_oferta date,
|
||||
ora_limita_oferta text,
|
||||
judet text, -- județul anunțului (poate diferi de beneficiar_judet)
|
||||
tip_contract text, -- "Furnizare" | "Servicii" | "Lucrări"
|
||||
versiune_specificatii text,
|
||||
titlu text, -- titlul anunțului (din list page)
|
||||
-- CUI enrichment (fuzzy match against firms.entities, second pass)
|
||||
cui text,
|
||||
cui_match_score real,
|
||||
cui_match_method text, -- 'exact_name' | 'trgm' | 'manual'
|
||||
matched_at timestamptz,
|
||||
-- Source tracking
|
||||
fetched_at timestamptz DEFAULT now(),
|
||||
raw_html_sha256 char(64) -- to detect re-fetch needed
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ben_anunt_smis ON fonduri.beneficiar_anunt(smis_proiect_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_ben_anunt_judet ON fonduri.beneficiar_anunt(judet);
|
||||
CREATE INDEX IF NOT EXISTS idx_ben_anunt_data ON fonduri.beneficiar_anunt(data_publicare DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_ben_anunt_cui ON fonduri.beneficiar_anunt(cui) WHERE cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_ben_anunt_name_trgm ON fonduri.beneficiar_anunt USING gin (beneficiar_name gin_trgm_ops);
|
||||
|
||||
|
||||
CREATE TABLE IF NOT EXISTS fonduri.beneficiar_anunt_lot (
|
||||
id bigserial PRIMARY KEY,
|
||||
anunt_id integer NOT NULL REFERENCES fonduri.beneficiar_anunt(id) ON DELETE CASCADE,
|
||||
lot_no integer NOT NULL, -- 1, 2, 3... (ordinal)
|
||||
lot_label text, -- "1" or "Achiziție licențe" — heading text
|
||||
descriere_url text, -- /desc-lot?d={lot_id}
|
||||
durata_contract text, -- e.g. "6 luni"
|
||||
buget_lei numeric(20,2),
|
||||
cpv_cod text, -- when present
|
||||
spec_url text,
|
||||
fetched_at timestamptz DEFAULT now(),
|
||||
UNIQUE(anunt_id, lot_no)
|
||||
);
|
||||
|
||||
-- Add lot_label column if migrating from earlier version
|
||||
ALTER TABLE fonduri.beneficiar_anunt_lot ADD COLUMN IF NOT EXISTS lot_label text;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ben_lot_anunt ON fonduri.beneficiar_anunt_lot(anunt_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_ben_lot_buget ON fonduri.beneficiar_anunt_lot(buget_lei DESC NULLS LAST);
|
||||
|
||||
|
||||
-- Optional sister table for the EU project itself (we link via smis_proiect_id).
|
||||
-- Populated by a separate /proiecte scraper later; placeholder schema for now:
|
||||
CREATE TABLE IF NOT EXISTS fonduri.beneficiar_proiect (
|
||||
id integer PRIMARY KEY, -- /proiecte/details/{type}/{id} → id
|
||||
proiect_type smallint, -- 1=SMIS, 2=MySMIS, etc.
|
||||
smis_code text, -- "319946"
|
||||
titlu text,
|
||||
beneficiar_name text,
|
||||
program_op text, -- POIM/POR/POCU/PNRR/...
|
||||
axa_prioritara text,
|
||||
valoare_totala_lei numeric(20,2),
|
||||
valoare_eligibila_lei numeric(20,2),
|
||||
contributie_ue_lei numeric(20,2),
|
||||
data_start date,
|
||||
data_end date,
|
||||
data_actualizare date,
|
||||
judet text,
|
||||
localitate text,
|
||||
cui text,
|
||||
cui_match_score real,
|
||||
fetched_at timestamptz DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ben_proiect_smis ON fonduri.beneficiar_proiect(smis_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_ben_proiect_program ON fonduri.beneficiar_proiect(program_op);
|
||||
CREATE INDEX IF NOT EXISTS idx_ben_proiect_cui ON fonduri.beneficiar_proiect(cui) WHERE cui IS NOT NULL;
|
||||
@@ -0,0 +1,66 @@
|
||||
-- 019_cui_matcher.sql
|
||||
-- Fuzzy CUI matcher for any external table that has a company name but no CUI
|
||||
-- (fonduri.beneficiar_anunt, fonduri.afir_plati, future ANI shareholdings, etc.)
|
||||
-- against firms.entities.
|
||||
--
|
||||
-- Strategy:
|
||||
-- 1. Build a normalized form of each company name on both sides
|
||||
-- (lowercase + unaccent + strip legal suffixes + collapse whitespace).
|
||||
-- 2. Stage A (exact normalized match): expect ~40-50% hit rate when the
|
||||
-- ONRC-canonical legal name was used in the source.
|
||||
-- 3. Stage B (pg_trgm fuzzy): top candidate ≥ 0.85 AND uniquely best
|
||||
-- (gap to second-best ≥ 0.10) → auto-accept.
|
||||
-- 4. Stage C (judet disambiguation): when multiple candidates above
|
||||
-- threshold, prefer firm whose adr_judet matches source's judet.
|
||||
|
||||
-- Idempotent. Re-runnable after each scrape.
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
CREATE EXTENSION IF NOT EXISTS unaccent;
|
||||
|
||||
-- ── Normalization helper ────────────────────────────────────────────────
|
||||
-- We can't mark unaccent as immutable in a function via CREATE FUNCTION
|
||||
-- because unaccent is by default STABLE. Wrap in IMMUTABLE so we can use
|
||||
-- in indexes. (See Postgres docs: a custom IMMUTABLE wrapper is the standard
|
||||
-- workaround.)
|
||||
CREATE OR REPLACE FUNCTION firms.normalize_company_name(input text)
|
||||
RETURNS text
|
||||
LANGUAGE plpgsql
|
||||
IMMUTABLE
|
||||
PARALLEL SAFE
|
||||
AS $$
|
||||
DECLARE
|
||||
s text;
|
||||
BEGIN
|
||||
IF input IS NULL THEN RETURN NULL; END IF;
|
||||
s := lower(public.unaccent(input));
|
||||
-- Strip leading legal-form prefixes
|
||||
s := regexp_replace(s, '^(s\.?c\.?|sc)\s+', '', 'i');
|
||||
-- Strip trailing legal-form suffixes (SRL, SRL-D, SA, PFA, II, IF, etc.)
|
||||
s := regexp_replace(s,
|
||||
'\s+(s\.?r\.?l\.?(\s*-?\s*d)?|s\.?a\.?|s\.?n\.?c\.?|s\.?c\.?s\.?|s\.?c\.?a\.?|p\.?f\.?a\.?|i\.?i\.?|i\.?f\.?)\s*\.?\s*$',
|
||||
'', 'i');
|
||||
-- Collapse internal punctuation/whitespace
|
||||
s := regexp_replace(s, '[\.,\-\(\)/\\]+', ' ', 'g');
|
||||
s := regexp_replace(s, '\s+', ' ', 'g');
|
||||
RETURN trim(s);
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Materialized helper column on firms.entities (no UPDATE — generated)
|
||||
ALTER TABLE firms.entities
|
||||
ADD COLUMN IF NOT EXISTS name_normalized text
|
||||
GENERATED ALWAYS AS (firms.normalize_company_name(name)) STORED;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_entities_name_normalized ON firms.entities(name_normalized);
|
||||
CREATE INDEX IF NOT EXISTS idx_entities_name_norm_trgm ON firms.entities USING gin (name_normalized gin_trgm_ops);
|
||||
|
||||
-- Optional: judet normalization to disambiguate
|
||||
CREATE OR REPLACE FUNCTION firms.normalize_judet(input text)
|
||||
RETURNS text
|
||||
LANGUAGE sql
|
||||
IMMUTABLE
|
||||
PARALLEL SAFE
|
||||
AS $$
|
||||
SELECT lower(public.unaccent(coalesce(input, '')))
|
||||
$$;
|
||||
@@ -0,0 +1,43 @@
|
||||
-- 020_fonduri_proiect_v2.sql
|
||||
-- Refactor fonduri.beneficiar_proiect to match what the source actually exposes.
|
||||
-- Source: https://beneficiar.fonduri-ue.ro:8080/proiecte/details/1/{id}
|
||||
--
|
||||
-- The page exposes only 7 fields (Cod SMIS, Program operațional, Axa, Domeniul
|
||||
-- de intervenție, Operațiune, Beneficiar, Data contract) — NOT valoare_totala/
|
||||
-- valoare_eligibila/contributie_ue/data_start/data_end/judet/localitate that
|
||||
-- the original aspirational schema (018) implied. Drop unused fields, add the
|
||||
-- ones we can populate, split each "Program/Axa/Domeniul/Operațiune" into a
|
||||
-- {cod, text} pair (first whitespace-separated token = code, rest = text).
|
||||
--
|
||||
-- The placeholder table from 018 has 0 rows → safe to drop + recreate.
|
||||
|
||||
DROP TABLE IF EXISTS fonduri.beneficiar_proiect;
|
||||
|
||||
CREATE TABLE fonduri.beneficiar_proiect (
|
||||
id integer PRIMARY KEY, -- /proiecte/details/{type}/{id} → id
|
||||
proiect_type smallint NOT NULL, -- 1=SMIS (only type seen so far)
|
||||
smis_code text, -- "Cod SMIS" e.g. "313646"
|
||||
titlu text, -- from <title> tag
|
||||
beneficiar_name text, -- "REALMET SRL"
|
||||
program_op_cod text, -- "PRNE" / "POIM" / "POR" / "PNRR" …
|
||||
program_op_text text, -- "Program Regional Nord-Est"
|
||||
axa_cod text, -- "PRNE_P1"
|
||||
axa_text text, -- "P1.P1. Nord-Est – O regiune mai competitivă…"
|
||||
domeniul_cod text, -- "RSO1.3"
|
||||
domeniul_text text, -- "RSO1.3_Intensificarea creșterii…"
|
||||
operatiune_cod text, -- "PRNE_A18"
|
||||
operatiune_text text, -- "Investiții pentru modernizarea…"
|
||||
data_contract date,
|
||||
cui text, -- fuzzy-matched later
|
||||
cui_match_score real,
|
||||
cui_match_method text,
|
||||
matched_at timestamptz,
|
||||
fetched_at timestamptz DEFAULT now(),
|
||||
raw_html_sha256 char(64)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_ben_proiect_smis ON fonduri.beneficiar_proiect(smis_code);
|
||||
CREATE INDEX idx_ben_proiect_program ON fonduri.beneficiar_proiect(program_op_cod);
|
||||
CREATE INDEX idx_ben_proiect_axa ON fonduri.beneficiar_proiect(axa_cod);
|
||||
CREATE INDEX idx_ben_proiect_cui ON fonduri.beneficiar_proiect(cui) WHERE cui IS NOT NULL;
|
||||
CREATE INDEX idx_ben_proiect_data ON fonduri.beneficiar_proiect(data_contract DESC NULLS LAST);
|
||||
@@ -0,0 +1,74 @@
|
||||
-- 021_regas_ajutoare.sql
|
||||
-- RegAS — Registrul Ajutoarelor de Stat (Consiliul Concurenței).
|
||||
-- Source: https://regas.consiliulconcurentei.ro/transparenta/index.html
|
||||
-- API: POST /apitransparenta/cautareTransparenta (XSRF-TOKEN cookie + header)
|
||||
-- Volume at first scrape (2026-05-09): 132,363 ajutoare individuale.
|
||||
--
|
||||
-- Each row = one ajutor de stat acordat unei firme (denumireBeneficiar+cui),
|
||||
-- cu masura (referintaMasura SA.xxx/yyyy), categorie/subcategorie, suma în RON,
|
||||
-- finantator (ministerul/agentia care a dat banii), instrumentAcordare
|
||||
-- (fonduri nerambursabile / credite / garantii / scutiri).
|
||||
--
|
||||
-- Cheie naturală: (cui, idMasura, dataAcordare, ajutorAcordatSubcategorie) —
|
||||
-- același beneficiar poate primi multiple tranșe pe aceeași măsură. Folosim
|
||||
-- un id sintetic SHA1 pentru ON CONFLICT idempotent.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS regas;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS regas.ajutoare (
|
||||
id char(40) PRIMARY KEY, -- sha1(cui|idMasura|dataAcordare|subcategorie|sumaSubcategorie)
|
||||
cui text NOT NULL,
|
||||
denumire_beneficiar text NOT NULL,
|
||||
raf text, -- "Registrul Ajutoarelor Fiscale" id (rare)
|
||||
alt_cod text, -- ID alternativ (rare)
|
||||
dimensiune_intreprindere text, -- "întreprindere mare" / "IMM" / etc.
|
||||
regiune text[], -- ["Regiunea I Nord-Est", ...] sau ["Toate regiunile"]
|
||||
domeniu_activitate text, -- CAEN cod (4 digits)
|
||||
data_acordare date, -- "DD/MM/YYYY" parsed
|
||||
id_masura integer NOT NULL, -- FK logic la /apitransparenta/downloadPdfMasura/{idMasura}
|
||||
denumire_masura text NOT NULL,
|
||||
referinta_masura text, -- "SA.104966/2022"
|
||||
activitati_finantate text[], -- CAEN codes
|
||||
categorie text,
|
||||
subcategorie text,
|
||||
obiectiv text,
|
||||
ajutor_acordat_categorie numeric(20,2), -- RON
|
||||
ajutor_acordat_subcategorie numeric(20,2), -- RON (de obicei = categorie pentru o singură subcategorie)
|
||||
intermediari_financiari text[],
|
||||
executanti text[],
|
||||
instrument_acordare text, -- "fonduri nerambursabile" / "garantii" / "credite"
|
||||
intensitate numeric(6,2), -- procent (e.g. 65.00)
|
||||
finantator text, -- "Ministerul ... (MIPE)"
|
||||
pdf_masura text, -- "1195_MASURA.pdf"
|
||||
fetched_at timestamptz DEFAULT now(),
|
||||
raw_json jsonb
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_regas_ajutoare_cui ON regas.ajutoare(cui);
|
||||
CREATE INDEX IF NOT EXISTS idx_regas_ajutoare_data ON regas.ajutoare(data_acordare DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_regas_ajutoare_finantator ON regas.ajutoare(finantator);
|
||||
CREATE INDEX IF NOT EXISTS idx_regas_ajutoare_referinta ON regas.ajutoare(referinta_masura);
|
||||
CREATE INDEX IF NOT EXISTS idx_regas_ajutoare_id_masura ON regas.ajutoare(id_masura);
|
||||
|
||||
COMMENT ON TABLE regas.ajutoare IS
|
||||
'State aid records from Consiliul Concurentei RegAS portal. One row per ajutor acordat. Source: https://regas.consiliulconcurentei.ro/transparenta/';
|
||||
|
||||
COMMENT ON COLUMN regas.ajutoare.id IS
|
||||
'Synthetic sha1 of (cui|idMasura|dataAcordare|subcategorie|ajutorAcordatSubcategorie). Used for idempotent upsert.';
|
||||
|
||||
-- Materialized view aggregating per CUI for fast firma profile lookup.
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS regas.mv_ajutoare_per_cui AS
|
||||
SELECT
|
||||
cui,
|
||||
COUNT(*) AS nr_ajutoare,
|
||||
SUM(ajutor_acordat_subcategorie) AS total_ron,
|
||||
COUNT(DISTINCT id_masura) AS nr_masuri,
|
||||
COUNT(DISTINCT finantator) AS nr_finantatori,
|
||||
MIN(data_acordare) AS prima_acordare,
|
||||
MAX(data_acordare) AS ultima_acordare,
|
||||
array_agg(DISTINCT finantator) FILTER (WHERE finantator IS NOT NULL) AS finantatori,
|
||||
array_agg(DISTINCT instrument_acordare) FILTER (WHERE instrument_acordare IS NOT NULL) AS instrumente
|
||||
FROM regas.ajutoare
|
||||
GROUP BY cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_regas_mv_cui ON regas.mv_ajutoare_per_cui(cui);
|
||||
@@ -0,0 +1,203 @@
|
||||
-- 024_aep_donatii.sql
|
||||
-- AEP — Autoritatea Electorală Permanentă — donații electorale & finanțare partide.
|
||||
--
|
||||
-- Sursă oficială (mandatată prin Legea 334/2006): rapoartele anuale + rapoartele
|
||||
-- de venituri și cheltuieli (RVC) ale partidelor + listele de donatori publicate
|
||||
-- în Monitorul Oficial pentru donații > 10 salarii minime brute.
|
||||
--
|
||||
-- Vehicul de ingest: portalul Expert Forum (banipartide.ro) care a aggregat-o
|
||||
-- deja în SQLite și o expune via endpoint base64-SQL la
|
||||
-- https://www.banipartide.ro/app/json.php?mode=dt&ssid=<base64>.
|
||||
-- (Sursele primare AEP sunt PDF/Excel + reCAPTCHA, deci EFOR este path de
|
||||
-- minim efort. Validate against AEP RVC PDFs as v2.)
|
||||
--
|
||||
-- Volume @ 2026-05-09:
|
||||
-- Donatori persoane juridice (>10 sal MO): 3,612 (2006-2024)
|
||||
-- Donatori persoane fizice (>10 sal MO): 30,792 (2006-2024)
|
||||
-- Donatori RVC (rapoarte venituri/cheltuieli, granular complet): 353,473
|
||||
--
|
||||
-- GDPR: CNP-urile sunt expuse în clear pe banipartide.ro (publicate în MO conf.
|
||||
-- legii). Le hash-ăm SHA256 pe ingest — la noi NU stocăm CNP raw. Numele
|
||||
-- complet e public prin lege și rămâne. Adresa pe firme (PJ) e public,
|
||||
-- la persoane fizice (PF) NU avem adresă la sursă.
|
||||
--
|
||||
-- Cross-source value: aep.donatii_pj.donator_cui ⨯ seap.announcements.supplier_cui
|
||||
-- = "donator X a donat Y RON partidului Z, apoi a câștigat W RON contracte SEAP".
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS aep;
|
||||
|
||||
COMMENT ON SCHEMA aep IS
|
||||
'Autoritatea Electorală Permanentă — donații, finanțare partide, RVC. Sursă: banipartide.ro (EFOR) → AEP/MO.';
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
-- aep.partide — registru partide normalizat (codes from banipartide source)
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS aep.partide (
|
||||
id text PRIMARY KEY, -- 'PSD', 'PNL', 'USR', 'AUR', 'UDMR', etc.
|
||||
nume_oficial text, -- 'Partidul Social Democrat'
|
||||
fondat date,
|
||||
sediu_cui text, -- CIF al partidului dacă cunoscut
|
||||
status text, -- 'activ' | 'dizolvat' | 'fuzionat'
|
||||
fetched_at timestamptz DEFAULT now()
|
||||
);
|
||||
|
||||
COMMENT ON TABLE aep.partide IS
|
||||
'Registru partide politice (cheie naturală = abreviere normalizată din sursa banipartide).';
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
-- aep.donatii_pj — donații de la persoane juridice (>10 salarii minime, MO)
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS aep.donatii_pj (
|
||||
id bigserial PRIMARY KEY,
|
||||
source_hash char(40) NOT NULL UNIQUE, -- sha1(nume|cui|partid|an|suma|data_donatie) for idempotent upsert
|
||||
donator_nume text NOT NULL,
|
||||
donator_cui text, -- CUI normalizat (numerals only, RO prefix stripped)
|
||||
donator_cui_raw text, -- forma originală (poate avea typos / "RO")
|
||||
reprezentant text,
|
||||
sediu text,
|
||||
nationalitate text, -- "română" / etc.
|
||||
partid_id text REFERENCES aep.partide(id) ON UPDATE CASCADE,
|
||||
filiala_partid text,
|
||||
suma_lei numeric(14,2) NOT NULL,
|
||||
an smallint NOT NULL,
|
||||
data_donatie_text text, -- format mixt în sursă: "11.10.2019; 13.11.2019" sau "10042010" — păstrăm raw
|
||||
data_donatie date, -- best-effort parsed (NULL când format incompatibil sau multiple)
|
||||
tip_donatie text, -- "Bani" / "Natură" / etc.
|
||||
felul_donatie text, -- "Bani" / "Ordin De Plată" / "Spațiu Publicitar"
|
||||
source_url text NOT NULL DEFAULT 'https://www.banipartide.ro/donatori-persoane-juridice.html',
|
||||
fetched_at timestamptz DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pj_cui ON aep.donatii_pj(donator_cui) WHERE donator_cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pj_partid ON aep.donatii_pj(partid_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pj_an ON aep.donatii_pj(an);
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pj_suma ON aep.donatii_pj(suma_lei DESC);
|
||||
|
||||
COMMENT ON TABLE aep.donatii_pj IS
|
||||
'Donații de la persoane juridice către partide, peste pragul de 10 salarii minime brute (publicate în MO). Sursă: banipartide.ro → AEP. Granularitate: o linie per (donator, partid, an, sumă, dată).';
|
||||
COMMENT ON COLUMN aep.donatii_pj.source_hash IS
|
||||
'sha1(nume_lower|cui|partid|an|suma|data_text). Garantează idempotenta scraperului.';
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
-- aep.donatii_pf — donații de la persoane fizice (>10 sal min, MO)
|
||||
-- CNP hash-uit (NICIODATĂ raw în DB).
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS aep.donatii_pf (
|
||||
id bigserial PRIMARY KEY,
|
||||
source_hash char(40) NOT NULL UNIQUE, -- sha1(nume|cnp_hash|partid|an|suma|data)
|
||||
donator_nume text NOT NULL,
|
||||
donator_cnp_sha256 char(64), -- SHA-256 hex of CNP (only if CNP was non-empty in source)
|
||||
partid_id text REFERENCES aep.partide(id) ON UPDATE CASCADE,
|
||||
organizatia text, -- filiala / organizatia partidului
|
||||
suma_lei numeric(14,2) NOT NULL,
|
||||
an smallint NOT NULL,
|
||||
data_donatie_text text,
|
||||
data_donatie date,
|
||||
tip_donatie text, -- "Donație" / "Cotizație" / "Împrumut"
|
||||
ce_s_a_donat text, -- "Bani" / "Bunuri" / etc.
|
||||
source_url text NOT NULL DEFAULT 'https://www.banipartide.ro/donatori-persoane-fizice.html',
|
||||
fetched_at timestamptz DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pf_cnp_hash ON aep.donatii_pf(donator_cnp_sha256) WHERE donator_cnp_sha256 IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pf_nume ON aep.donatii_pf(donator_nume);
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pf_partid ON aep.donatii_pf(partid_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pf_an ON aep.donatii_pf(an);
|
||||
|
||||
COMMENT ON TABLE aep.donatii_pf IS
|
||||
'Donații de la persoane fizice către partide, peste pragul de 10 salarii minime (publicate în MO). CNP-urile sunt SHA-256 hashed la ingest. Sursă: banipartide.ro.';
|
||||
COMMENT ON COLUMN aep.donatii_pf.donator_cnp_sha256 IS
|
||||
'SHA-256 hex digest al CNP. Permite re-identificare dacă cineva are CNP-ul, dar nu dezvăluie CNP-ul. NU e key-uit cu salt — scopul e doar de-duplicare cross-an, nu protecție criptografică împotriva brute-force pe spațiul CNP-urilor românești.';
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
-- aep.donatii_rvc — toți donatorii din rapoartele de venituri/cheltuieli
|
||||
-- (donații + cotizații + împrumuturi, fără pragul de 10 salarii)
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS aep.donatii_rvc (
|
||||
id bigserial PRIMARY KEY,
|
||||
source_hash char(40) NOT NULL UNIQUE,
|
||||
donator_nume text NOT NULL,
|
||||
judet text, -- "Alba", "București"
|
||||
cod_judet text, -- "AB", "B"
|
||||
tip_venit text, -- "Cotizație" | "Donație" | "Împrumut"
|
||||
partid_id text REFERENCES aep.partide(id) ON UPDATE CASCADE,
|
||||
suma_lei numeric(14,2) NOT NULL,
|
||||
mod_incasare text, -- "Banca" | "Numerar" | etc.
|
||||
an smallint NOT NULL,
|
||||
data_donatie_text text,
|
||||
data_donatie date,
|
||||
source_url text NOT NULL DEFAULT 'https://www.banipartide.ro/donatori-rvc.html',
|
||||
fetched_at timestamptz DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_rvc_partid ON aep.donatii_rvc(partid_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_rvc_an ON aep.donatii_rvc(an);
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_rvc_nume ON aep.donatii_rvc(donator_nume);
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_donatii_rvc_judet ON aep.donatii_rvc(judet);
|
||||
|
||||
COMMENT ON TABLE aep.donatii_rvc IS
|
||||
'Toate donațiile/cotizațiile/împrumuturile din rapoartele de venituri și cheltuieli (RVC) ale partidelor, fără pragul de 10 salarii. ~353K rânduri. Sursă: banipartide.ro → AEP.';
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
-- aep.scrape_log — audit trail al scraperelor (per tabel × per zi)
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS aep.scrape_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
scraper text NOT NULL, -- 'donatii_pj' | 'donatii_pf' | 'donatii_rvc'
|
||||
source_url text NOT NULL,
|
||||
rows_seen integer NOT NULL,
|
||||
rows_inserted integer NOT NULL,
|
||||
rows_updated integer NOT NULL,
|
||||
rows_skipped integer NOT NULL,
|
||||
duration_ms integer NOT NULL,
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||||
error text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_scrape_log_scraper ON aep.scrape_log(scraper, started_at DESC);
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
-- Materialized view: agregare per CUI pentru profile firmă rapid.
|
||||
-- (refresh-ed by cron post-scrape; see refresh-mvs.sh)
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS aep.mv_donatii_per_cui AS
|
||||
SELECT
|
||||
donator_cui AS cui,
|
||||
COUNT(*) AS nr_donatii,
|
||||
SUM(suma_lei) AS total_lei,
|
||||
COUNT(DISTINCT partid_id) AS nr_partide,
|
||||
array_agg(DISTINCT partid_id)
|
||||
FILTER (WHERE partid_id IS NOT NULL) AS partide,
|
||||
MIN(an) AS prima_donatie_an,
|
||||
MAX(an) AS ultima_donatie_an
|
||||
FROM aep.donatii_pj
|
||||
WHERE donator_cui IS NOT NULL
|
||||
GROUP BY donator_cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_aep_mv_donatii_per_cui ON aep.mv_donatii_per_cui(cui);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW aep.mv_donatii_per_cui IS
|
||||
'Pre-aggregat pentru profile firmă: donații totale per CUI. Refresh după fiecare scrape.';
|
||||
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
-- Materialized view: top donatori per partid (folosit pe pagini publice)
|
||||
-- ──────────────────────────────────────────────────────────────────────────
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS aep.mv_top_donatori_partid AS
|
||||
SELECT
|
||||
partid_id,
|
||||
donator_nume,
|
||||
donator_cui,
|
||||
COUNT(*) AS nr_donatii,
|
||||
SUM(suma_lei) AS total_lei,
|
||||
MIN(an) AS prima_donatie_an,
|
||||
MAX(an) AS ultima_donatie_an
|
||||
FROM aep.donatii_pj
|
||||
WHERE partid_id IS NOT NULL
|
||||
GROUP BY partid_id, donator_nume, donator_cui;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_mv_top_donatori_partid_partid ON aep.mv_top_donatori_partid(partid_id, total_lei DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_aep_mv_top_donatori_partid_cui ON aep.mv_top_donatori_partid(donator_cui) WHERE donator_cui IS NOT NULL;
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW aep.mv_top_donatori_partid IS
|
||||
'Top donatori per partid pentru afișare publică. Datele sunt deja publice prin lege (MO).';
|
||||
@@ -0,0 +1,96 @@
|
||||
-- 025_anaf_datornici.sql
|
||||
-- ANAF — Lista contribuabililor cu obligații fiscale restante (datornici).
|
||||
-- Source: https://www.anaf.ro/restante/ (publicare trimestrială, Ord. 558/2016).
|
||||
-- Plus lista albă (contribuabili FĂRĂ datorii) la /restante/listaalba.xhtml.
|
||||
--
|
||||
-- Bazele legale: ANAF publică trimestrial sumele restante peste plafoane —
|
||||
-- 500.000 lei (mari contribuabili), 250.000 lei (mijlocii), 100.000 lei
|
||||
-- (mici), 10.000 lei (instituții publice). Sub plafon nu se publică.
|
||||
--
|
||||
-- KILLER USE CASE: cross-reference cu seap.announcements pentru a găsi
|
||||
-- "firme datornice care au câștigat contracte publice" — interzis prin
|
||||
-- art. 165 Legea 98/2016 dacă sunt obligații fiscale executorii.
|
||||
--
|
||||
-- IMPORTANT — limitări surse de date (2026-05-09):
|
||||
-- 1. anaf.ro/restante/index.xhtml e o aplicație JSF/PrimeFaces cu CAPTCHA
|
||||
-- de tip kaptcha pe submit. Nu e bulk-scrapeable fără OCR/captcha-solver
|
||||
-- pentru cele ~5K-15K rânduri per trimestru (×4 trim × ~10 ani = ~500K).
|
||||
-- 2. data.gov.ro publică UN SINGUR snapshot Q1-2016 (mari/mijlocii/micijuridice
|
||||
-- CSV) — 140,780 rânduri, util ca baseline istoric.
|
||||
-- 3. listafirme.eu agregă ANAF datornici în spatele unui paywall API.
|
||||
--
|
||||
-- Strategia ingest:
|
||||
-- - Faza 1 (THIS): schema + importer CSV pentru data.gov.ro Q1-2016 snapshot.
|
||||
-- ~140K rânduri reale, validează schema end-to-end.
|
||||
-- - Faza 2 (TODO): scraper cu captcha-solver extern (anti-captcha.com /
|
||||
-- 2captcha) pentru anaf.ro/restante/ live + arhive trimestriale dacă găsim.
|
||||
-- - Faza 3: integrare cu firms.entities pentru profile badges + recipe-uri.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS anaf;
|
||||
|
||||
-- ── Tabelă principală: datornici per (CUI × dată publicare) ─────────────────
|
||||
CREATE TABLE IF NOT EXISTS anaf.datornici (
|
||||
cui text NOT NULL, -- fără prefix RO
|
||||
name text, -- denumirea contribuabilului
|
||||
judet text, -- 2026: nu e disponibil în CSV-urile data.gov.ro Q1-2016, dar e expus în XHTML live
|
||||
publication_date date NOT NULL, -- prima zi a trimestrului (2016-01-01 = T1 2016)
|
||||
period_label text NOT NULL, -- 'T1 2016' / 'T2 2024' etc.
|
||||
debtor_category text, -- 'mari' | 'mijlocii' | 'mici' | 'institutii_publice' | 'persoane_fizice'
|
||||
debt_total numeric(20,2), -- suma RON (principal + accesorii la toate cele 4 bugete)
|
||||
debt_principal numeric(20,2), -- suma RON (principal la toate cele 4 bugete)
|
||||
debt_penalty numeric(20,2), -- suma RON (accesorii la toate cele 4 bugete)
|
||||
debt_contested numeric(20,2), -- suma RON contestată (necontestată = total - contested)
|
||||
-- Detaliu per buget (păstrăm pentru forensică, deși total/principal/penalty
|
||||
-- agregat e suficient pentru majoritatea recipes):
|
||||
budget_state_principal numeric(20,2),
|
||||
budget_state_penalty numeric(20,2),
|
||||
budget_state_contested numeric(20,2),
|
||||
budget_social_principal numeric(20,2),
|
||||
budget_social_penalty numeric(20,2),
|
||||
budget_social_contested numeric(20,2),
|
||||
budget_unemployment_principal numeric(20,2),
|
||||
budget_unemployment_penalty numeric(20,2),
|
||||
budget_unemployment_contested numeric(20,2),
|
||||
budget_health_principal numeric(20,2),
|
||||
budget_health_penalty numeric(20,2),
|
||||
budget_health_contested numeric(20,2),
|
||||
source_url text, -- URL original al CSV / XHTML
|
||||
fetched_at timestamptz DEFAULT now(),
|
||||
PRIMARY KEY (cui, publication_date)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_anaf_datornici_cui ON anaf.datornici(cui);
|
||||
CREATE INDEX IF NOT EXISTS idx_anaf_datornici_pub_date ON anaf.datornici(publication_date DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_anaf_datornici_total ON anaf.datornici(debt_total DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_anaf_datornici_category ON anaf.datornici(debtor_category);
|
||||
|
||||
-- ── Lista albă: firme FĂRĂ obligații restante (eligibile la SEAP) ───────────
|
||||
-- Se publică separat la /restante/listaalba.xhtml. Mai puțin acționabilă, dar
|
||||
-- utilă pentru a confirma negativ "firma X NU avea datorii când a câștigat
|
||||
-- contractul Y" (când lipsește din .datornici nu înseamnă neapărat că nu
|
||||
-- avea — poate fi sub plafon).
|
||||
CREATE TABLE IF NOT EXISTS anaf.lista_alba (
|
||||
cui text NOT NULL,
|
||||
name text,
|
||||
publication_date date NOT NULL,
|
||||
period_label text NOT NULL,
|
||||
source_url text,
|
||||
fetched_at timestamptz DEFAULT now(),
|
||||
PRIMARY KEY (cui, publication_date)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_anaf_lista_alba_cui ON anaf.lista_alba(cui);
|
||||
CREATE INDEX IF NOT EXISTS idx_anaf_lista_alba_pub_date ON anaf.lista_alba(publication_date DESC);
|
||||
|
||||
-- ── View: cea mai recentă publicare per CUI (latest debt status) ────────────
|
||||
CREATE OR REPLACE VIEW anaf.datornici_latest AS
|
||||
SELECT DISTINCT ON (cui)
|
||||
cui, name, judet, publication_date, period_label, debtor_category,
|
||||
debt_total, debt_principal, debt_penalty, debt_contested
|
||||
FROM anaf.datornici
|
||||
ORDER BY cui, publication_date DESC;
|
||||
|
||||
COMMENT ON SCHEMA anaf IS 'ANAF (Agenția Națională de Administrare Fiscală) public registries';
|
||||
COMMENT ON TABLE anaf.datornici IS 'Lista contribuabililor cu obligații restante, publicată trimestrial (Ord. 558/2016)';
|
||||
COMMENT ON TABLE anaf.lista_alba IS 'Lista albă: contribuabili FĂRĂ obligații restante la data publicării';
|
||||
COMMENT ON VIEW anaf.datornici_latest IS 'Cel mai recent snapshot al datoriilor per CUI';
|
||||
@@ -0,0 +1,224 @@
|
||||
-- 026_bugetar.sql
|
||||
-- Transparență Bugetară MFP — execuții bugetare ale entităților publice.
|
||||
-- Source primar: https://mfinante.gov.ro/apps/transparenta-bugetara/index.htm
|
||||
-- → redirecționează spre aplicația activă: extranet.anaf.mfinante.gov.ro/anaf/extranet/EXECUTIEBUGETARA
|
||||
--
|
||||
-- KILLER USE CASE: cross-reference cu seap.announcements pentru a calcula
|
||||
-- "ponderea unui furnizor SEAP în cheltuielile totale ale unei UAT" — adică
|
||||
-- "Comuna X a cheltuit 80% din buget cu 1 furnizor". Plus "Capitole bugetare
|
||||
-- consumate disproporționat de 1 firmă" (cap 51 admin pub × top vendor).
|
||||
--
|
||||
-- Volum estimat: ~13.700 entități × 12 luni × 5 ani × ~30 linii/raport
|
||||
-- ≈ 25M rânduri pentru detaliat (FXB-EXB-900). Pentru raport
|
||||
-- COFOG3 agregat (FXB-EXB-901), ~822K rânduri pentru perioada
|
||||
-- 2021-2025 la nivel ordonator principal.
|
||||
--
|
||||
-- ─── LIMITĂRI SURSE DE DATE (2026-05-09) ────────────────────────────────────
|
||||
-- 1. Aplicația oficială (extranet.anaf.mfinante.gov.ro/EXECUTIEBUGETARA) e
|
||||
-- IBM WebSphere Portal cu CAPTCHA imagine pe fiecare căutare. Endpoint-ul
|
||||
-- de căutare e POST cu un URL stateful + `seccode`. Nu există URL deep
|
||||
-- direct per (CUI, perioadă) fără sesiune + captcha solver.
|
||||
-- 2. Există un endpoint de autocomplete EXPUS fără captcha care întoarce TOATE
|
||||
-- denumirile entităților publice per (sector_bugetar, județ):
|
||||
-- POST /Rapoarte_Forexe/.../res/id=populateEpAJAX/.../
|
||||
-- data: idSector=02&idJudet=CJ
|
||||
-- response: ["BIBLIOTECA JUDETEANA OCTAVIAN GOGA CLUJ", ...] (JSON array).
|
||||
-- Util pentru a construi universul ~13.7K entități, dar NU întoarce CUI-urile.
|
||||
-- 3. Fișiere XML/XLSX detaliate (FXB-EXB-900) se descarcă DOAR dintr-un raport
|
||||
-- de rezultate randat după captcha. Hash-uri de URL sunt valide ~minute.
|
||||
-- 4. data.gov.ro publică doar agregate naționale (BGC = Bugetul General
|
||||
-- Consolidat) ca XLS lunar — NU per-CUI.
|
||||
-- 5. Multe primării publică propriile execuții pe site-urile lor (PDF/XLSX),
|
||||
-- dar formatele variază — Plan B pentru top-N municipii.
|
||||
--
|
||||
-- ─── STRATEGIA INGEST (faze) ────────────────────────────────────────────────
|
||||
-- Faza 1 (THIS migration): schema completă pregătită pentru parser FXB-EXB-900
|
||||
-- + tabelă auxiliară bugetar.entitate cu universul EP din autocomplete API
|
||||
-- (~13.7K rânduri × 5 sectoare × 42 județe ≈ 30K seed-uri ce vor fi dedupe-uite).
|
||||
-- + descrierea formatelor XML/XLSX (din PDF-urile MFP "Structura fisier XML
|
||||
-- raport FXB-900/901/905") așa încât parserul să fie deterministic.
|
||||
-- Faza 2 (TODO ~80h): integrare captcha solver (2captcha/anti-captcha) +
|
||||
-- crawler asincron care urmează (sector × județ × tipRaport × an × lună).
|
||||
-- Faza 3: cross-link cu firms.entities + seap.announcements pentru recipe-uri
|
||||
-- "buget vs procurement".
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS bugetar;
|
||||
|
||||
-- ────────────────────────────────────────────────────────────────────────────
|
||||
-- Tabel principal: linii de execuție bugetară per (entitate × perioadă × clasificație)
|
||||
-- ────────────────────────────────────────────────────────────────────────────
|
||||
-- Format aliniat pe FXB-EXB-900 (raport detaliat per entitate publică) +
|
||||
-- agregatele FXB-EXB-901 (ordonator principal) și FXB-EXB-905 (ordonator secundar).
|
||||
-- Câmpurile clasificării bugetare urmăresc structura ROMC (Clasificația
|
||||
-- Bugetară Românească): Capitol → Subcapitol → Paragraf → Articol → Aliniat.
|
||||
CREATE TABLE IF NOT EXISTS bugetar.executie (
|
||||
id bigserial PRIMARY KEY,
|
||||
|
||||
-- Identificare entitate
|
||||
cui text NOT NULL, -- CUI entitate publică (fără prefix RO)
|
||||
cui_ordonator text, -- CUI ordonator principal (poate diferi de cui)
|
||||
entity_name text, -- denumire la momentul raportării
|
||||
sector_bugetar text, -- '01' BS, '02' BL, '03' BASS, '04' SOMAJ, '05' FNUASS
|
||||
judet text, -- cod 2 litere (AB, CJ, B, ...)
|
||||
|
||||
-- Perioada raportării
|
||||
period text NOT NULL, -- 'YYYY-MM' (cumulat de la 1 ian până la sfârșitul lunii)
|
||||
period_year smallint NOT NULL,
|
||||
period_month smallint NOT NULL, -- 1..12
|
||||
|
||||
-- Tip raport sursă
|
||||
raport_tip text NOT NULL, -- 'FXB-EXB-900' | 'FXB-EXB-901' | 'FXB-EXB-905' | 'FXB-RBG-003' | 'FXB-EXB-902'
|
||||
raport_nivel text, -- 'entitate' | 'ordonator_principal' | 'ordonator_secundar'
|
||||
|
||||
-- Clasificația bugetară (5 niveluri Cf. ROMC)
|
||||
side text NOT NULL, -- 'venituri' | 'cheltuieli'
|
||||
capitol text, -- 4 cifre, ex '5101' = Autorități publice
|
||||
subcapitol text, -- 6 cifre, ex '510102'
|
||||
paragraf text, -- 8 cifre
|
||||
articol text, -- 10 cifre, ex '5101010101'
|
||||
aliniat text, -- 12 cifre (rar folosit)
|
||||
classification_label text, -- denumire human-readable
|
||||
cofog3 text, -- cod COFOG3 (Classification of Functions of Government, agregat)
|
||||
|
||||
-- Sumele cheie (toate în RON, cumulat de la 1 ian)
|
||||
-- Înțeles per FXB-EXB-900:
|
||||
-- credite_bug_aprobate_ini = bugetul aprobat inițial pentru anul curent
|
||||
-- credite_bug_aprobate_def = bugetul aprobat definitiv (după rectificări) la sfârșit perioadă
|
||||
-- credite_bug_trimestru = creditele bugetare trimestriale cumulate
|
||||
-- angajamente_bugetare = sumele angajate (FXB-EXB-902)
|
||||
-- angajamente_legale = sumele angajate prin contracte ferme
|
||||
-- plati_efectuate = plăți efective la sfârșit perioadă (= "execuția cumulată")
|
||||
-- incasari_realizate = pentru side='venituri', sumele încasate
|
||||
credite_bug_aprobate_ini numeric(20,2),
|
||||
credite_bug_aprobate_def numeric(20,2),
|
||||
credite_bug_trimestru numeric(20,2),
|
||||
angajamente_bugetare numeric(20,2),
|
||||
angajamente_legale numeric(20,2),
|
||||
plati_efectuate numeric(20,2),
|
||||
incasari_realizate numeric(20,2),
|
||||
-- Sumă "primary" pentru queries simple — pentru side='cheltuieli' = plati_efectuate,
|
||||
-- pentru side='venituri' = incasari_realizate. Calculat la INSERT.
|
||||
suma_executat numeric(20,2),
|
||||
|
||||
-- Metadata sursă
|
||||
source_url text, -- URL original al fișierului XML/XLSX
|
||||
source_hash text, -- sha256(URL + filename) pentru dedup
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
|
||||
-- Constraint de unicitate: un (entitate, perioadă, side, clasificare, raport_tip, sursa) e unic.
|
||||
-- aliniat poate fi NULL — folosim COALESCE prin index parțial.
|
||||
CONSTRAINT uq_bugetar_executie_full UNIQUE
|
||||
(cui, period, raport_tip, side, capitol, subcapitol, paragraf, articol, aliniat, sector_bugetar)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_bugetar_executie_cui_year ON bugetar.executie(cui, period_year DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_bugetar_executie_period ON bugetar.executie(period_year, period_month);
|
||||
CREATE INDEX IF NOT EXISTS idx_bugetar_executie_capitol ON bugetar.executie(capitol) WHERE capitol IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_bugetar_executie_judet_sector ON bugetar.executie(judet, sector_bugetar);
|
||||
CREATE INDEX IF NOT EXISTS idx_bugetar_executie_side_amount ON bugetar.executie(side, suma_executat DESC NULLS LAST);
|
||||
|
||||
COMMENT ON TABLE bugetar.executie IS
|
||||
'Linii de execuție bugetară (FXB-EXB-900/901/905). Un rând per (entitate, perioadă, clasificație, side).';
|
||||
COMMENT ON COLUMN bugetar.executie.suma_executat IS
|
||||
'Suma "principală" pentru queries: plati_efectuate la cheltuieli, incasari_realizate la venituri.';
|
||||
|
||||
-- ────────────────────────────────────────────────────────────────────────────
|
||||
-- Universul entităților publice raportoare (descoperit din autocomplete API)
|
||||
-- ────────────────────────────────────────────────────────────────────────────
|
||||
-- Pasul 1 al ingest-ului: enumeră (sector_bugetar × județ) → descarcă lista
|
||||
-- denumirilor de entități publice. Apoi fuzzy-match cu firms.entities pentru a
|
||||
-- atașa CUI. Asta deblochează crawl-ul Fazei 2 (când avem captcha solver).
|
||||
CREATE TABLE IF NOT EXISTS bugetar.entitate (
|
||||
id bigserial PRIMARY KEY,
|
||||
entity_name text NOT NULL, -- denumire raw din MFP (case-sensitive)
|
||||
sector_bugetar text NOT NULL, -- '01' .. '05'
|
||||
judet text NOT NULL, -- cod 2 litere
|
||||
cui text, -- atașat post-hoc prin fuzzy match
|
||||
cui_match_score numeric(5,2), -- 0..1 confidence pentru matching
|
||||
cui_match_method text, -- 'exact' | 'fuzzy_anaf' | 'manual'
|
||||
|
||||
is_ordonator_principal boolean DEFAULT false, -- true dacă apare în lista populateOcpAJAX
|
||||
raport_count integer DEFAULT 0, -- număr de raport-perioade descărcate cu succes
|
||||
last_fetched_period text, -- 'YYYY-MM' al ultimei perioade ingest-uate
|
||||
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
|
||||
UNIQUE (entity_name, sector_bugetar, judet)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_bugetar_entitate_cui ON bugetar.entitate(cui) WHERE cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_bugetar_entitate_judet ON bugetar.entitate(judet, sector_bugetar);
|
||||
CREATE INDEX IF NOT EXISTS idx_bugetar_entitate_name_trgm ON bugetar.entitate USING gin(entity_name gin_trgm_ops);
|
||||
|
||||
COMMENT ON TABLE bugetar.entitate IS
|
||||
'Universul entităților publice raportoare descoperit din autocomplete API MFP. CUI-ul se atașează post-hoc prin fuzzy match cu firms.entities.';
|
||||
|
||||
-- ────────────────────────────────────────────────────────────────────────────
|
||||
-- Job tracking — pentru reluare crawl la întreruperi
|
||||
-- ────────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS bugetar.crawl_job (
|
||||
id bigserial PRIMARY KEY,
|
||||
cui text,
|
||||
entity_name text,
|
||||
period text NOT NULL, -- 'YYYY-MM'
|
||||
raport_tip text NOT NULL,
|
||||
status text NOT NULL DEFAULT 'pending', -- 'pending' | 'fetched' | 'parsed' | 'failed' | 'no_data'
|
||||
attempts smallint NOT NULL DEFAULT 0,
|
||||
last_error text,
|
||||
rows_inserted integer,
|
||||
fetched_at timestamptz,
|
||||
parsed_at timestamptz,
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
UNIQUE (cui, period, raport_tip)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_bugetar_crawl_status ON bugetar.crawl_job(status, period);
|
||||
|
||||
-- ────────────────────────────────────────────────────────────────────────────
|
||||
-- Materialized views pentru dashboard rapid
|
||||
-- ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
-- Sumar per (CUI × an): venituri totale + cheltuieli totale + nr luni raportate.
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS bugetar.mv_per_cui_year AS
|
||||
SELECT
|
||||
cui,
|
||||
period_year,
|
||||
SUM(suma_executat) FILTER (WHERE side = 'venituri') AS venituri_total,
|
||||
SUM(suma_executat) FILTER (WHERE side = 'cheltuieli') AS cheltuieli_total,
|
||||
COUNT(DISTINCT period) AS months_reported,
|
||||
MAX(entity_name) AS entity_name_sample,
|
||||
MAX(judet) AS judet,
|
||||
MAX(sector_bugetar) AS sector_bugetar
|
||||
FROM bugetar.executie
|
||||
GROUP BY cui, period_year;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_bugetar_cui_year ON bugetar.mv_per_cui_year(cui, period_year);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_bugetar_year_chelt ON bugetar.mv_per_cui_year(period_year, cheltuieli_total DESC NULLS LAST);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW bugetar.mv_per_cui_year IS
|
||||
'Sumar venituri+cheltuieli per (CUI × an). Refresh după fiecare ingest batch.';
|
||||
|
||||
-- Sumar per (CUI × an × capitol) — pentru analiza distribuției pe capitole bugetare.
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS bugetar.mv_per_cui_capitol_year AS
|
||||
SELECT
|
||||
cui,
|
||||
period_year,
|
||||
capitol,
|
||||
side,
|
||||
SUM(suma_executat) AS suma_total,
|
||||
MAX(classification_label) AS capitol_label
|
||||
FROM bugetar.executie
|
||||
WHERE capitol IS NOT NULL
|
||||
GROUP BY cui, period_year, capitol, side;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_bugetar_cui_cap_year
|
||||
ON bugetar.mv_per_cui_capitol_year(cui, period_year, capitol, side);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW bugetar.mv_per_cui_capitol_year IS
|
||||
'Sumar pe capitol bugetar per (CUI × an). Pentru analiza "pe ce s-au cheltuit banii".';
|
||||
|
||||
-- ────────────────────────────────────────────────────────────────────────────
|
||||
-- COMMENTS & schema-level metadata
|
||||
-- ────────────────────────────────────────────────────────────────────────────
|
||||
COMMENT ON SCHEMA bugetar IS
|
||||
'Transparență Bugetară MFP — execuția bugetară lunară a entităților publice. Sursă: https://mfinante.gov.ro/apps/transparenta-bugetara/';
|
||||
@@ -0,0 +1,15 @@
|
||||
-- 027_afir_tip_fond.sql
|
||||
-- Augment fonduri.afir_plati with tip_fond discriminator to host both FEADR
|
||||
-- (project-based development funds) and FEGA (per-hectare direct payments
|
||||
-- to farmers) in the same fact table. Schema is near-identical between the
|
||||
-- two; only specific columns are populated per fund (e.g. fega_op vs feadr_op).
|
||||
--
|
||||
-- Backwards compatible: existing 1.04M rows (2023+2024 FEADR) get tip_fond='FEADR'.
|
||||
|
||||
ALTER TABLE fonduri.afir_plati
|
||||
ADD COLUMN IF NOT EXISTS tip_fond text NOT NULL DEFAULT 'FEADR';
|
||||
|
||||
-- Backfill any rows that were inserted before column existed
|
||||
UPDATE fonduri.afir_plati SET tip_fond = 'FEADR' WHERE tip_fond IS NULL;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_afir_plati_tip_fond ON fonduri.afir_plati(tip_fond, source_year);
|
||||
@@ -0,0 +1,136 @@
|
||||
-- 028_anre.sql
|
||||
-- ANRE — Autoritatea Națională de Reglementare în domeniul Energiei.
|
||||
-- Public license/authorization registries scraped from portal.anre.ro/PublicLists.
|
||||
--
|
||||
-- Sources (all return JSON via Kendo Grid AJAX endpoint, pageSize=99999 returns full):
|
||||
-- 1. /PublicLists/LicenteAutorizatii → ~4,927 licenses (electricitate)
|
||||
-- flat columns: Societate, Sediu, Localitate, Judet, NrLicenta, DataEmitere,
|
||||
-- DataExpirare, Stare, TipAL, TipActivitate, Comentariu
|
||||
-- 2. /PublicLists/LicenteAutorizatiiGN → ~353 licenses (gaze naturale)
|
||||
-- parent row per company, "Detaliu" is HTML <table> with multiple sub-rows
|
||||
-- (Nr.Document, Tip document, Tip activitate, Localitate, Data emitere,
|
||||
-- Data expirare, Stare, Decizie)
|
||||
-- 3. /PublicLists/Atestate → ~9,745 atestate
|
||||
-- parent row per company, "Detaliu" HTML <table> w/ Nr.atestat, Tip tarif,
|
||||
-- Data emitere, Data expirare, Stare
|
||||
-- 4. /PublicLists/AutorizatiiElectricieniAutorizati → ~101,529 electricieni autorizati
|
||||
-- flat: NumePrenume, NrRegistru, Localitate, Judet, NrAutorizare,
|
||||
-- TarifAutorizare, TipAutorizare, DataExpirare, Stare
|
||||
--
|
||||
-- Cross-source value: anre.licente.titular_cui (resolved via firms.normalize_company_name
|
||||
-- fuzzy match) × seap.announcements.supplier_cui = "energy operators with state contracts".
|
||||
-- Red-flag: company wins energy-related SEAP contract but has no ANRE license.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS anre;
|
||||
|
||||
-- ── 1. Licente & autorizatii (companies) — unified flat ────────────────────
|
||||
-- One row per distinct license document. license_source distinguishes the
|
||||
-- 3 corporate sources (electricitate / gaze / atestate). Detaliu sub-rows
|
||||
-- from GN/atestate are flattened to one row per sub-row. Source position
|
||||
-- (NrCrt) is preserved in raw_json for traceability.
|
||||
CREATE TABLE IF NOT EXISTS anre.licente (
|
||||
id char(40) PRIMARY KEY, -- sha1(license_source|license_no|titular_name|data_emitere|tip_al)
|
||||
license_source text NOT NULL, -- 'electricitate' | 'gaze' | 'atestat'
|
||||
license_no text NOT NULL, -- "NrLicenta" / "Nr. Document" / "Nr. atestat"
|
||||
license_type text, -- "Licenta" / "Autorizatie de Infiintare" / "Confirmare Licenta" / "Atestat"
|
||||
license_subtype text, -- "TipActivitate" / "Tip document" / "Tip tarif" (e.g. "Producere", "Furnizare", "Tarif A1")
|
||||
titular_name text NOT NULL, -- raw "Societate"
|
||||
titular_name_norm text, -- firms.normalize_company_name(titular_name) — populated post-insert
|
||||
titular_cui text, -- resolved via fuzzy match (NULL initially)
|
||||
cui_match_score numeric(4,3),
|
||||
cui_match_method text, -- 'exact_norm' / 'trgm_unique' / 'trgm_judet'
|
||||
matched_at timestamptz,
|
||||
sediu text, -- adresa
|
||||
localitate text,
|
||||
judet text,
|
||||
telefon_fax text,
|
||||
data_emitere date,
|
||||
data_expirare date,
|
||||
stare text, -- 'Acordata' / 'Expirata' / 'Retrasa' / 'Suspendata' / 'Incetat valabilitate, sub 1 MW' / etc.
|
||||
decizie text, -- "Nr.Dec. 2223" — only GN
|
||||
comentariu text, -- electricitate only
|
||||
raw_json jsonb,
|
||||
fetched_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_anre_licente_titular_cui ON anre.licente(titular_cui) WHERE titular_cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_anre_licente_titular_norm_trgm ON anre.licente USING gin (titular_name_norm gin_trgm_ops);
|
||||
CREATE INDEX IF NOT EXISTS idx_anre_licente_source_stare ON anre.licente(license_source, stare);
|
||||
CREATE INDEX IF NOT EXISTS idx_anre_licente_data_expirare ON anre.licente(data_expirare);
|
||||
CREATE INDEX IF NOT EXISTS idx_anre_licente_subtype ON anre.licente(license_subtype);
|
||||
|
||||
COMMENT ON TABLE anre.licente IS
|
||||
'ANRE licenses & authorizations (electricitate + gaze + atestate). One row per distinct license document. Source: portal.anre.ro/PublicLists/{LicenteAutorizatii,LicenteAutorizatiiGN,Atestate}.';
|
||||
COMMENT ON COLUMN anre.licente.id IS
|
||||
'sha1(license_source|license_no|titular_name|data_emitere|license_type) — idempotent upsert key.';
|
||||
COMMENT ON COLUMN anre.licente.license_source IS
|
||||
'"electricitate" / "gaze" / "atestat" — source registry.';
|
||||
|
||||
-- ── 2. Electricieni autorizati (individuals) ───────────────────────────────
|
||||
-- People, not firms. No CUI; we keep just to enable lookups by name.
|
||||
CREATE TABLE IF NOT EXISTS anre.electricieni (
|
||||
id bigserial PRIMARY KEY,
|
||||
nume_prenume text NOT NULL,
|
||||
nr_registru integer, -- "NrRegistru"
|
||||
nr_autorizare integer, -- "NrAutorizare" — natural unique key
|
||||
tip_autorizare text, -- "Autorizare Electricieni"
|
||||
tarif text, -- "Tarif II B" / "Tarif IV"
|
||||
localitate text,
|
||||
judet text,
|
||||
telefon_fax text,
|
||||
data_expirare date,
|
||||
stare text, -- "Activ" / "Expirat" / "Retras"
|
||||
raw_json jsonb,
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
UNIQUE (nr_autorizare, nume_prenume)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_anre_electricieni_judet ON anre.electricieni(judet, stare);
|
||||
CREATE INDEX IF NOT EXISTS idx_anre_electricieni_nume_trgm ON anre.electricieni USING gin (nume_prenume gin_trgm_ops);
|
||||
|
||||
COMMENT ON TABLE anre.electricieni IS
|
||||
'ANRE — electricieni autorizati (persoane fizice). Source: portal.anre.ro/PublicLists/AutorizatiiElectricieniAutorizati.';
|
||||
|
||||
-- ── 3. Scrape log (mirrors aep.scrape_log convention) ──────────────────────
|
||||
CREATE TABLE IF NOT EXISTS anre.scrape_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
scraper text NOT NULL, -- 'electricitate' / 'gaze' / 'atestat' / 'electricieni'
|
||||
source_url text NOT NULL,
|
||||
rows_seen integer NOT NULL DEFAULT 0,
|
||||
rows_inserted integer NOT NULL DEFAULT 0,
|
||||
rows_updated integer NOT NULL DEFAULT 0,
|
||||
rows_skipped integer NOT NULL DEFAULT 0,
|
||||
duration_ms integer NOT NULL DEFAULT 0,
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||||
error text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_anre_scrape_log_started ON anre.scrape_log(started_at DESC);
|
||||
|
||||
-- ── 4. Materialized view: per-CUI license rollup ───────────────────────────
|
||||
-- Joinable with seap.announcements.supplier_cui to detect licensed-vs-unlicensed
|
||||
-- energy contractors.
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS anre.mv_licente_per_cui AS
|
||||
SELECT
|
||||
titular_cui AS cui,
|
||||
COUNT(*) AS nr_licente_total,
|
||||
COUNT(*) FILTER (WHERE license_source = 'electricitate') AS nr_electricitate,
|
||||
COUNT(*) FILTER (WHERE license_source = 'gaze') AS nr_gaze,
|
||||
COUNT(*) FILTER (WHERE license_source = 'atestat') AS nr_atestate,
|
||||
COUNT(*) FILTER (WHERE stare ILIKE 'Acord%' OR stare ILIKE 'Activ%') AS nr_active,
|
||||
COUNT(*) FILTER (WHERE stare ILIKE 'Expir%') AS nr_expirate,
|
||||
COUNT(*) FILTER (WHERE stare ILIKE 'Retras%' OR stare ILIKE 'Suspend%') AS nr_retrase,
|
||||
array_agg(DISTINCT license_subtype) FILTER (WHERE license_subtype IS NOT NULL) AS subtipuri,
|
||||
array_agg(DISTINCT license_source) AS surse,
|
||||
MIN(data_emitere) AS prima_emitere,
|
||||
MAX(data_emitere) AS ultima_emitere,
|
||||
MAX(data_expirare) AS ultima_expirare
|
||||
FROM anre.licente
|
||||
WHERE titular_cui IS NOT NULL
|
||||
GROUP BY titular_cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_anre_mv_licente_per_cui ON anre.mv_licente_per_cui(cui);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW anre.mv_licente_per_cui IS
|
||||
'Rollup of ANRE licenses per CUI. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY anre.mv_licente_per_cui';
|
||||
@@ -0,0 +1,131 @@
|
||||
-- 029_ancom.sql
|
||||
-- ANCOM — Autoritatea Națională pentru Administrare și Reglementare în Comunicații.
|
||||
-- Public registry of authorized providers of electronic communications networks
|
||||
-- and services. Source:
|
||||
-- https://www.ancom.ro/reglementare-ro/comunicatii-electronice/
|
||||
-- furnizori-comunicatii-electronice/
|
||||
-- lista-furnizorilor-de-retele-si-servicii-de-comunicatii-autorizati/
|
||||
--
|
||||
-- The list is paginated server-side (10 rows/page, ~57 pages → ~570 furnizori).
|
||||
-- Each row links to a HTML detail page at:
|
||||
-- https://www.ancom.ro/sablon/furnizorinew_23/?id={id}&pid=4186
|
||||
--
|
||||
-- The detail page exposes:
|
||||
-- • Denumire (titular)
|
||||
-- • Adresa, Oras/Comuna, Judet/Sector
|
||||
-- • Cod unic de înregistrare (CUI) — direct, no fuzzy match needed
|
||||
-- • EUID (Registrul Comerțului) — e.g. ROONRC.J16/3108/1992
|
||||
-- • R1..R11 — tipuri de retele (Fire metalice, Coaxial, Fibra optica, Mobil,
|
||||
-- Spectru radio, etc.) cu "Data nasterii dreptului"
|
||||
-- • S1..S12 — tipuri de servicii (Internet la puncte fixe, Voce mobil,
|
||||
-- Comunicații interpersonale, etc.)
|
||||
--
|
||||
-- Cross-source value:
|
||||
-- ancom.operatori.titular_cui × seap.announcements.supplier_cui = furnizori
|
||||
-- telco cu contracte publice. Inverse (anunturi telco CPV 32/64 cu supplier
|
||||
-- NU în ancom.operatori) = potențial neautorizat.
|
||||
--
|
||||
-- Schema layout:
|
||||
-- 1. ancom.operatori — flat row per provider (CUI direct from page)
|
||||
-- 2. ancom.drepturi — long table: 1 row per (operator, R/S code)
|
||||
-- cu data nasterii dreptului. Permite filtrare
|
||||
-- pe tip retea/serviciu (R3=fibra optica etc.)
|
||||
-- 3. ancom.scrape_log — mirrors anre.scrape_log convention
|
||||
-- 4. ancom.mv_operatori_per_cui— rollup pentru join cu seap.announcements
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS ancom;
|
||||
|
||||
-- ── 1. Operatori (furnizori autorizati) ────────────────────────────────────
|
||||
-- One row per ancom.id (the registry numeric id from sablon/furnizorinew_23).
|
||||
-- ancom_id is PK because it's the natural unique key in the registry.
|
||||
CREATE TABLE IF NOT EXISTS ancom.operatori (
|
||||
ancom_id integer PRIMARY KEY, -- ?id={N} in detail URL
|
||||
titular_name text NOT NULL, -- raw from list table
|
||||
titular_name_norm text, -- firms.normalize_company_name() — for unmatched-CUI fallback
|
||||
titular_cui text, -- direct from detail page; idempotent string ('3071154')
|
||||
cui_match_method text, -- 'direct' (from page) | 'exact_norm' | 'trgm_unique' | 'trgm_judet'
|
||||
cui_match_score numeric(4,3),
|
||||
matched_at timestamptz,
|
||||
euid text, -- 'ROONRC.J16/3108/1992' — Registrul Comerțului
|
||||
adresa text,
|
||||
oras text,
|
||||
judet text, -- 'DOLJ', 'SECTOR 1', etc.
|
||||
list_judet text, -- judet from list (may differ from detail)
|
||||
detail_url text NOT NULL, -- canonical URL
|
||||
status text NOT NULL DEFAULT 'autorizat', -- 'autorizat' | 'radiat' | 'sanctionat' | 'inactiv'
|
||||
raw_html_hash text, -- sha1 of detail HTML body — change detection
|
||||
fetched_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_cui ON ancom.operatori(titular_cui) WHERE titular_cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_name_norm ON ancom.operatori USING gin (titular_name_norm gin_trgm_ops);
|
||||
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_judet ON ancom.operatori(judet);
|
||||
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_status ON ancom.operatori(status);
|
||||
|
||||
COMMENT ON TABLE ancom.operatori IS
|
||||
'ANCOM authorized communications providers. One row per ancom_id from registry. Source: ancom.ro/reglementare-ro/.../lista-furnizorilor-...autorizati/';
|
||||
COMMENT ON COLUMN ancom.operatori.ancom_id IS
|
||||
'Natural unique key from detail URL ?id={N}&pid=4186. Stable across scrapes.';
|
||||
COMMENT ON COLUMN ancom.operatori.titular_cui IS
|
||||
'CUI direct from detail page "Cod unic de înregistrare". Most rows match — fuzzy fallback used only when missing.';
|
||||
|
||||
-- ── 2. Drepturi (R1..R11 + S1..S12 catalog) ────────────────────────────────
|
||||
-- Long table — one row per (operator, code). Lets us answer:
|
||||
-- "câți furnizori au drept S2 (mobil) activ?"
|
||||
-- "în Cluj câți furnizori au R3 (fibră optică)?"
|
||||
CREATE TABLE IF NOT EXISTS ancom.drepturi (
|
||||
ancom_id integer NOT NULL REFERENCES ancom.operatori(ancom_id) ON DELETE CASCADE,
|
||||
cod text NOT NULL, -- 'R1' .. 'R11' | 'S1' .. 'S12'
|
||||
tip text NOT NULL, -- 'retea' | 'serviciu'
|
||||
descriere text, -- 'Fire metalice (DSL)' / 'Internet la puncte fixe' / etc.
|
||||
data_nasterii date, -- "Data nasterii dreptului"
|
||||
PRIMARY KEY (ancom_id, cod)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ancom_drepturi_cod ON ancom.drepturi(cod);
|
||||
CREATE INDEX IF NOT EXISTS idx_ancom_drepturi_tip ON ancom.drepturi(tip);
|
||||
|
||||
COMMENT ON TABLE ancom.drepturi IS
|
||||
'Drepturile fiecarui furnizor — R1..R11 (retele) + S1..S12 (servicii) cu data nasterii dreptului. Long table, one row per (operator, code).';
|
||||
|
||||
-- ── 3. Scrape log ──────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS ancom.scrape_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
scraper text NOT NULL, -- 'autorizati' | 'radiati' | 'sanctionati'
|
||||
source_url text NOT NULL,
|
||||
rows_seen integer NOT NULL DEFAULT 0,
|
||||
rows_inserted integer NOT NULL DEFAULT 0,
|
||||
rows_updated integer NOT NULL DEFAULT 0,
|
||||
rows_skipped integer NOT NULL DEFAULT 0,
|
||||
duration_ms integer NOT NULL DEFAULT 0,
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||||
error text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ancom_scrape_log_started ON ancom.scrape_log(started_at DESC);
|
||||
|
||||
-- ── 4. Per-CUI rollup (joinable with seap.announcements.supplier_cui) ──────
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS ancom.mv_operatori_per_cui AS
|
||||
SELECT
|
||||
o.titular_cui AS cui,
|
||||
COUNT(*) AS nr_autorizatii,
|
||||
array_agg(DISTINCT o.ancom_id ORDER BY o.ancom_id) AS ancom_ids,
|
||||
array_agg(DISTINCT d.cod) FILTER (WHERE d.tip = 'retea') AS retele,
|
||||
array_agg(DISTINCT d.cod) FILTER (WHERE d.tip = 'serviciu') AS servicii,
|
||||
bool_or(d.cod = 'S1') AS are_internet_fix,
|
||||
bool_or(d.cod = 'S2') AS are_mobil,
|
||||
bool_or(d.cod = 'R3') AS are_fibra,
|
||||
bool_or(o.status = 'autorizat') AS are_status_activ,
|
||||
MIN(d.data_nasterii) AS prima_autorizare,
|
||||
MAX(d.data_nasterii) AS ultima_autorizare,
|
||||
MAX(o.fetched_at) AS ultima_actualizare
|
||||
FROM ancom.operatori o
|
||||
LEFT JOIN ancom.drepturi d ON d.ancom_id = o.ancom_id
|
||||
WHERE o.titular_cui IS NOT NULL
|
||||
GROUP BY o.titular_cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_ancom_mv_per_cui ON ancom.mv_operatori_per_cui(cui);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW ancom.mv_operatori_per_cui IS
|
||||
'Rollup ANCOM per CUI (autorizatii + tipuri de retele/servicii). Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY ancom.mv_operatori_per_cui';
|
||||
@@ -0,0 +1,211 @@
|
||||
-- 030_ani_schema.sql
|
||||
-- ANI declarații de avere și interese — flagship transparency feature.
|
||||
--
|
||||
-- Source: declaratii.integritate.eu (e-DAI 2022→), old-declaratii.integritate.eu
|
||||
-- (archive 2008-2022). Public by Law 176/2010, GDPR-safe (no CNP stored).
|
||||
--
|
||||
-- ~1.3M PDF declarations of Romanian public officials. Cross-references
|
||||
-- politicians × firms-they-own × procurement-contracts (firms.entities, seap.*).
|
||||
--
|
||||
-- See ANI-PLAN.md for full architecture, volume estimates, and rollout plan.
|
||||
-- This file = Stage 0 (schema only, no data).
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS ani;
|
||||
GRANT USAGE ON SCHEMA ani TO PUBLIC;
|
||||
|
||||
|
||||
-- ── ani.officials ──────────────────────────────────────────────────────────
|
||||
-- Distinct demnitar/funcționar public. Filled by Stage 4 (entity resolution),
|
||||
-- not by the listing scraper. ani.declaratii.official_id is nullable until
|
||||
-- dedup runs.
|
||||
CREATE TABLE IF NOT EXISTS ani.officials (
|
||||
id bigserial PRIMARY KEY,
|
||||
normalized_name text NOT NULL, -- lower(unaccent(name)) collapsed
|
||||
display_name text NOT NULL, -- "Popescu Ioan-Vasile"
|
||||
cnp_hash char(64), -- SHA-256 if extractable (rare)
|
||||
first_seen_year smallint, -- min(declaration year)
|
||||
last_seen_year smallint, -- max(declaration year)
|
||||
slug text UNIQUE, -- "popescu-ioan-vasile" + suffix
|
||||
primary_function text, -- most-frequent function
|
||||
primary_judet text, -- most-frequent judet
|
||||
declaration_count integer DEFAULT 0, -- materialized count for UI
|
||||
created_at timestamptz DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_officials_norm_name
|
||||
ON ani.officials (normalized_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_officials_norm_name_trgm
|
||||
ON ani.officials USING gin (normalized_name gin_trgm_ops);
|
||||
|
||||
|
||||
-- ── ani.declaratii ─────────────────────────────────────────────────────────
|
||||
-- One row per PDF declaration. Listing scraper fills the metadata; PDF
|
||||
-- downloader fills pdf_path + pdf_sha256; parser fills parse_status.
|
||||
CREATE TABLE IF NOT EXISTS ani.declaratii (
|
||||
id bigserial PRIMARY KEY,
|
||||
official_id bigint REFERENCES ani.officials(id) ON DELETE SET NULL,
|
||||
-- raw fields straight from portal listing (pre-resolution)
|
||||
raw_official_name text NOT NULL,
|
||||
raw_institution text,
|
||||
raw_function text,
|
||||
raw_localitate text,
|
||||
raw_judet text,
|
||||
-- declaration details
|
||||
year smallint NOT NULL,
|
||||
declaration_type text NOT NULL CHECK (declaration_type IN
|
||||
('avere','interese','avere+interese')),
|
||||
submission_kind text CHECK (submission_kind IN
|
||||
('anuala','numire-functie','incetare-functie',
|
||||
'rectificativa','periodica','altele') OR
|
||||
submission_kind IS NULL),
|
||||
data_completare date,
|
||||
-- source tracking (which portal, which ID)
|
||||
source_portal text NOT NULL CHECK (source_portal IN
|
||||
('old','new','depozitar')),
|
||||
source_url text NOT NULL,
|
||||
source_id text, -- uniqueIdentifier (old) / _id (new)
|
||||
-- PDF storage
|
||||
pdf_path text, -- relative to /opt/vreaudigital-data/ani
|
||||
pdf_sha256 char(64),
|
||||
pdf_size_bytes integer,
|
||||
fetched_at timestamptz,
|
||||
-- parser state
|
||||
parsed_at timestamptz,
|
||||
parse_status text DEFAULT 'pending' CHECK (parse_status IN
|
||||
('pending','ok','ocr_required','parse_failed',
|
||||
'template_unknown','download_failed')),
|
||||
parse_template text, -- '2008-2010' | '2011-2016' | '2017+' | 'edai'
|
||||
parse_error text,
|
||||
inserted_at timestamptz DEFAULT now()
|
||||
);
|
||||
|
||||
-- one declaration per (portal, source_id) — primary dedup key
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_declaratii_source
|
||||
ON ani.declaratii (source_portal, source_id) WHERE source_id IS NOT NULL;
|
||||
-- content-hash dedup — same PDF re-uploaded under different IDs
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_declaratii_sha
|
||||
ON ani.declaratii (pdf_sha256) WHERE pdf_sha256 IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_declaratii_official
|
||||
ON ani.declaratii (official_id, year DESC) WHERE official_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_declaratii_year
|
||||
ON ani.declaratii (year DESC, declaration_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_declaratii_pending
|
||||
ON ani.declaratii (parse_status) WHERE parse_status IN ('pending','ocr_required');
|
||||
CREATE INDEX IF NOT EXISTS idx_declaratii_raw_name_trgm
|
||||
ON ani.declaratii USING gin (raw_official_name gin_trgm_ops);
|
||||
CREATE INDEX IF NOT EXISTS idx_declaratii_raw_inst_trgm
|
||||
ON ani.declaratii USING gin (raw_institution gin_trgm_ops);
|
||||
|
||||
|
||||
-- ── ani.bunuri ─────────────────────────────────────────────────────────────
|
||||
-- Sections I (imobile) + II (mobile). raw_row_text always preserved for
|
||||
-- audit / debug.
|
||||
CREATE TABLE IF NOT EXISTS ani.bunuri (
|
||||
id bigserial PRIMARY KEY,
|
||||
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
|
||||
category text NOT NULL CHECK (category IN
|
||||
('imobil-teren','imobil-cladire','mobil-vehicul',
|
||||
'mobil-bijuterii','mobil-tablouri-arta','mobil-altele')),
|
||||
subcategory text, -- "agricol"/"intravilan"/"apartament"
|
||||
localitate text,
|
||||
judet text,
|
||||
tara text DEFAULT 'România',
|
||||
year_acquired smallint,
|
||||
mode_acquired text, -- "cumparare"/"mostenire"/"donatie"
|
||||
area_sqm numeric,
|
||||
share_pct numeric, -- 1.0 = full ownership
|
||||
co_owner text,
|
||||
value_lei numeric,
|
||||
value_currency text DEFAULT 'RON',
|
||||
raw_row_text text -- audit
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_bunuri_decl ON ani.bunuri (declaration_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_bunuri_judet ON ani.bunuri (judet) WHERE judet IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_bunuri_category ON ani.bunuri (category);
|
||||
|
||||
|
||||
-- ── ani.shareholdings ──────────────────────────────────────────────────────
|
||||
-- Section IX (firme deținute / asociate). THE flagship table — joins to
|
||||
-- firms.entities via firm_cui (resolved in Stage 4) and to seap.announcements
|
||||
-- via that CUI for "politician-with-firm-supplier-to-state" recipes.
|
||||
CREATE TABLE IF NOT EXISTS ani.shareholdings (
|
||||
id bigserial PRIMARY KEY,
|
||||
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
|
||||
firm_name_raw text NOT NULL, -- text from PDF
|
||||
firm_cui text, -- resolved later
|
||||
firm_match_score real, -- pg_trgm similarity
|
||||
firm_match_method text CHECK (firm_match_method IN
|
||||
('exact_name','trgm','manual','unmatched') OR
|
||||
firm_match_method IS NULL),
|
||||
matched_at timestamptz,
|
||||
role text, -- "actionar"/"asociat"/"administrator"/"membru CA"
|
||||
share_pct numeric,
|
||||
value_lei numeric,
|
||||
category text CHECK (category IN
|
||||
('societate','asociatie','fundatie','cooperativa',
|
||||
'oNG','altele') OR category IS NULL),
|
||||
raw_row_text text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_share_decl ON ani.shareholdings (declaration_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_share_cui
|
||||
ON ani.shareholdings (firm_cui) WHERE firm_cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_share_name_trgm
|
||||
ON ani.shareholdings USING gin (firm_name_raw gin_trgm_ops);
|
||||
CREATE INDEX IF NOT EXISTS idx_share_unmatched
|
||||
ON ani.shareholdings (firm_match_method)
|
||||
WHERE firm_match_method IS NULL OR firm_match_method = 'unmatched';
|
||||
|
||||
|
||||
-- ── ani.functii ────────────────────────────────────────────────────────────
|
||||
-- Section VIII — funcții publice și private. Joinable to seap.cui_authority
|
||||
-- (when is_public + institution_cui matches an authority) and firms.entities
|
||||
-- (when is_public = false).
|
||||
CREATE TABLE IF NOT EXISTS ani.functii (
|
||||
id bigserial PRIMARY KEY,
|
||||
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
|
||||
is_public boolean,
|
||||
function_name text NOT NULL,
|
||||
institution_name text NOT NULL,
|
||||
institution_cui text, -- resolved later
|
||||
start_year smallint,
|
||||
end_year smallint, -- NULL if active
|
||||
salary_lei numeric, -- annual income from this function
|
||||
raw_row_text text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_functii_decl ON ani.functii (declaration_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_functii_inst_cui
|
||||
ON ani.functii (institution_cui) WHERE institution_cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_functii_inst_name_trgm
|
||||
ON ani.functii USING gin (institution_name gin_trgm_ops);
|
||||
|
||||
|
||||
-- ── ani.donatii ────────────────────────────────────────────────────────────
|
||||
-- Section V (donații primite).
|
||||
CREATE TABLE IF NOT EXISTS ani.donatii (
|
||||
id bigserial PRIMARY KEY,
|
||||
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
|
||||
donor_name text,
|
||||
donation_type text CHECK (donation_type IN
|
||||
('bani','imobil','mobil','servicii','altele') OR
|
||||
donation_type IS NULL),
|
||||
value_lei numeric,
|
||||
currency text DEFAULT 'RON',
|
||||
year_received smallint,
|
||||
raw_row_text text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_donatii_decl ON ani.donatii (declaration_id);
|
||||
|
||||
|
||||
-- ── Comments ──────────────────────────────────────────────────────────────
|
||||
COMMENT ON SCHEMA ani IS
|
||||
'ANI declarații de avere și interese. Sources: declaratii.integritate.eu + old-declaratii.integritate.eu. Public by Law 176/2010.';
|
||||
COMMENT ON TABLE ani.declaratii IS
|
||||
'One row per PDF declaration. official_id resolved in Stage 4 dedup.';
|
||||
COMMENT ON TABLE ani.shareholdings IS
|
||||
'Section IX firme deținute. THE flagship cross-reference: firm_cui joins to firms.entities → seap.announcements.';
|
||||
COMMENT ON COLUMN ani.declaratii.pdf_path IS
|
||||
'Relative path under /opt/vreaudigital-data/ani/. Full path: /opt/vreaudigital-data/ani/$pdf_path';
|
||||
@@ -0,0 +1,197 @@
|
||||
-- 031_cnas.sql
|
||||
-- CNAS — Casa Națională de Asigurări de Sănătate.
|
||||
-- Lista furnizorilor de servicii medicale aflați în relație contractuală cu CAS-urile județene.
|
||||
--
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- Source landscape (investigated 2026-05-10):
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- The CNAS source ecosystem is in active migration. There are 3 layers:
|
||||
--
|
||||
-- 1. cnas.ro/wp-content/uploads/ — central WordPress media library, 4.18K
|
||||
-- files total. ~70-90 are "furnizori" PDFs (national + per-județ snapshots).
|
||||
-- Discoverable via /wp-json/wp/v2/media REST API. 99% PDF, ~5 XLSX.
|
||||
-- → THIS IS THE PRIMARY SOURCE for v1.
|
||||
--
|
||||
-- 2. cas.cnas.ro/casXX — new Angular SPA per județ (42 sub-instances). The
|
||||
-- backend admin/api is a Blazor app at /admin/api/* with X-Instance-Key
|
||||
-- header routing. As of 2026-05, most endpoints return [] or 500. The data
|
||||
-- hasn't been migrated to the new infra yet.
|
||||
-- → DEFERRED — track via watch script, ingest when populated.
|
||||
--
|
||||
-- 3. www.cnas.ro/casXX/page/lista-furnizori-*.html — old pre-migration WP.
|
||||
-- 301-redirects to cnas.ro/casXX/ which is now a stub. Effectively dead
|
||||
-- for our purposes; some files survive in CKEditor uploads on the old
|
||||
-- cas.cnas.ro/casXX/theme/cnas/js/ckeditor/filemanager/userfiles/...
|
||||
-- → DEFERRED — best harvested via Wayback CDX.
|
||||
--
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- File naming convention (cnas.ro/wp-content/uploads/):
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- Most files DON'T contain CUI codes. They contain provider names + sediu +
|
||||
-- contact data. CUI matching is post-ingest via firms.normalize_company_name
|
||||
-- + trgm fuzzy search (mirroring anre.licente pattern).
|
||||
--
|
||||
-- Filename signals tip_serviciu:
|
||||
-- FURNIZORI-SPITALE-IN-CONTRACT-2024.pdf → tip='spital'
|
||||
-- FURNIZORI-IN-CONTRACT-MEDICINA-DE-FAMILIE.. → tip='medicina_familie'
|
||||
-- FURNIZORI-DE-SERVICII-FARMACEUTICE-.. → tip='farmacie'
|
||||
-- FURNIZORI-DISPOZITIVE-MEDICALE-.. → tip='dispozitive_medicale'
|
||||
-- FURNIZORI-MEDICINA-DENTARA-.. → tip='medicina_dentara'
|
||||
-- FURNIZORI-INGRIJIRI-MEDICALE-.. → tip='ingrijiri_medicale'
|
||||
-- FURNIZORI-INGRIJIRI-PALIATIVE-.. → tip='ingrijiri_paliative'
|
||||
-- FURNIZORI-RECUPERARE-MEDICALA-.. → tip='recuperare_medicala'
|
||||
-- FURNIZORI-PNS-.. → tip='pns' (programe nationale)
|
||||
-- FURNIZORI-IN-CONTRACT-AMBULATORIU-CLINIC-.. → tip='ambulatoriu_clinic'
|
||||
-- FURNIZORI-IN-CONTRACT-AMBULATORIU-PARACLIN.. → tip='paraclinic'
|
||||
-- FURNIZORI-..-URGENTA-PRESPITALICEASCA-.. → tip='urgenta_transport'
|
||||
-- Lista-furnizori-clinice-.. → tip='clinic'
|
||||
-- Lista-furnizori-RECA-.. → tip='recuperare_a'
|
||||
-- Lista-furnizori-radioterapie-.. → tip='radioterapie'
|
||||
-- Lista-furnizori-testare-genetica-.. → tip='testare_genetica'
|
||||
-- Lista-furnizori-tumori-solide-maligne-.. → tip='oncologie'
|
||||
--
|
||||
-- Județ extraction is heuristic: from filename (e.g. CAS-GORJ, CAS-ARAD) OR
|
||||
-- from PDF page header ("CASA DE ASIGURARI A JUDETULUI GORJ"). When both fail,
|
||||
-- it's a national list (rare — most centrally-uploaded files are actually
|
||||
-- per-județ, as the PDFs are produced by individual CAS-uri).
|
||||
--
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- Cross-source value:
|
||||
-- ───────────────────────────────────────────────────────────────────────────
|
||||
-- 1. cnas.furnizori.cui (resolved post-ingest) × seap.announcements.supplier_cui
|
||||
-- @ CPV 33.* (medical equipment) / 85.* (medical services) =
|
||||
-- "medical providers winning state contracts directly + via insurance"
|
||||
-- 2. cnas.furnizori.cui × anaf_datornici.datornic.cui = "spitale & clinici cu
|
||||
-- datorii la stat" — red-flag pattern.
|
||||
-- 3. cnas.furnizori.cui × fonduri.proiect.beneficiar_cui (POIM-Sănătate) =
|
||||
-- EU-funded healthcare providers.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS cnas;
|
||||
|
||||
-- ── 1. Documents (file metadata catalog) ───────────────────────────────────
|
||||
-- Tracks every PDF/XLSX harvested from cnas.ro WP media. One row per file URL.
|
||||
-- Idempotent re-fetch: same URL → UPDATE fetched_at + parsed_at.
|
||||
CREATE TABLE IF NOT EXISTS cnas.documents (
|
||||
id bigserial PRIMARY KEY,
|
||||
source_url text NOT NULL UNIQUE,
|
||||
source text NOT NULL, -- 'wp-media' | 'cas-cnas-spa' | 'wayback' | 'manual'
|
||||
wp_media_id bigint, -- /wp-json/wp/v2/media id (when source='wp-media')
|
||||
title text,
|
||||
filename text,
|
||||
mime_type text,
|
||||
file_size_bytes integer,
|
||||
file_sha1 char(40), -- of downloaded bytes (used to detect changes)
|
||||
published_at timestamptz, -- WP "date" (publication on CNAS site)
|
||||
-- Inferred classification (heuristic from filename + content):
|
||||
tip_serviciu text, -- 'spital' / 'farmacie' / 'medicina_familie' / etc. (see header above)
|
||||
judet text, -- "GORJ" / "BIHOR" / NULL when national or unknown
|
||||
perioada text, -- raw period from filename ("01.07.2024", "decembrie-2024")
|
||||
page_count integer, -- for PDFs, post-parse
|
||||
parse_status text NOT NULL DEFAULT 'pending',-- 'pending' / 'ok' / 'failed' / 'unsupported_format' / 'no_table'
|
||||
parse_error text,
|
||||
rows_extracted integer NOT NULL DEFAULT 0,
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
parsed_at timestamptz
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_documents_tip ON cnas.documents(tip_serviciu);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_documents_judet ON cnas.documents(judet);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_documents_published ON cnas.documents(published_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_documents_parse_status ON cnas.documents(parse_status);
|
||||
|
||||
COMMENT ON TABLE cnas.documents IS
|
||||
'Catalog of every CNAS provider-list document (PDF/XLSX) harvested. One row per source URL. Source: cnas.ro/wp-json/wp/v2/media + future SPA endpoints.';
|
||||
COMMENT ON COLUMN cnas.documents.tip_serviciu IS
|
||||
'Service category inferred from filename: spital, farmacie, medicina_familie, medicina_dentara, dispozitive_medicale, ambulatoriu_clinic, paraclinic, ingrijiri_medicale, ingrijiri_paliative, recuperare_medicala, urgenta_transport, pns, clinic, oncologie, testare_genetica, radioterapie, other.';
|
||||
|
||||
-- ── 2. Furnizori (extracted provider records) ──────────────────────────────
|
||||
-- One row per (document, NR_CRT). Provider name is the fundamental key — CUI
|
||||
-- is resolved POST-INSERT via firms.normalize_company_name fuzzy match (mirror
|
||||
-- of anre.licente pattern).
|
||||
--
|
||||
-- We allow the same legal entity (same CUI) to appear MULTIPLE times across
|
||||
-- documents (e.g. same hospital listed in spitale + paraclinic + clinic lists).
|
||||
-- Dedup is via mv_cnas_per_cui rollup, not at insert time.
|
||||
CREATE TABLE IF NOT EXISTS cnas.furnizori (
|
||||
id bigserial PRIMARY KEY,
|
||||
document_id bigint NOT NULL REFERENCES cnas.documents(id) ON DELETE CASCADE,
|
||||
-- Document context (denormalized for fast filtering):
|
||||
tip_serviciu text, -- inherited from document
|
||||
judet text, -- inherited from document (or row-level when available)
|
||||
perioada text, -- "la 01.07.2024" etc.
|
||||
-- Source row data:
|
||||
nr_crt integer, -- in-document index (1..N)
|
||||
cod_furnizor text, -- "BH01" / "CT12" — CAS-internal supplier code (when present)
|
||||
name text NOT NULL, -- raw from document ("Spitalul Clinic Județean Oradea", "DR.HEIM HERMINA")
|
||||
name_norm text, -- firms.normalize_company_name — populated post-insert
|
||||
reprezentant text, -- legal rep / cabinet doctor (for medicina familie, dentara)
|
||||
sediu text, -- adresa
|
||||
localitate text,
|
||||
telefon text,
|
||||
fax text,
|
||||
email text,
|
||||
specialitate text, -- pentru ambulatoriu, paraclinic, clinic
|
||||
-- CUI matching (resolved post-ingest):
|
||||
cui text,
|
||||
cui_match_score numeric(4,3),
|
||||
cui_match_method text, -- 'exact_norm' / 'trgm_unique' / 'trgm_judet' / 'manual'
|
||||
matched_at timestamptz,
|
||||
raw_text text, -- the raw text-row from PDF for debugging
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
UNIQUE (document_id, nr_crt, name)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_furn_cui ON cnas.furnizori(cui) WHERE cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_furn_judet_tip ON cnas.furnizori(judet, tip_serviciu);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_furn_name_trgm ON cnas.furnizori USING gin (name_norm gin_trgm_ops);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_furn_document ON cnas.furnizori(document_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_furn_localitate ON cnas.furnizori(localitate) WHERE localitate IS NOT NULL;
|
||||
|
||||
COMMENT ON TABLE cnas.furnizori IS
|
||||
'Furnizori de servicii medicale extrași din PDF-urile CNAS. One row per (document, nr_crt, name). CUI resolved post-insert via firms.normalize_company_name + trgm fuzzy.';
|
||||
COMMENT ON COLUMN cnas.furnizori.cod_furnizor IS
|
||||
'Internal CAS supplier code, e.g. "BH01" (Bihor seq 01), "MB427" (București-Ilfov seq 427). NOT a CUI.';
|
||||
COMMENT ON COLUMN cnas.furnizori.cui IS
|
||||
'Resolved CUI via firms.normalize_company_name + pg_trgm match. NULL = unmatched; cabinete medicale individuale (CMI) often have no CUI in firms registry.';
|
||||
|
||||
-- ── 3. Scrape log ─────────────────────────────────────────────────────────
|
||||
-- Mirrors anre.scrape_log convention.
|
||||
CREATE TABLE IF NOT EXISTS cnas.scrape_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
scraper text NOT NULL, -- 'wp-media-list' / 'parse-pdf' / 'match-cui' / 'cas-cnas-spa'
|
||||
source_url text,
|
||||
rows_seen integer NOT NULL DEFAULT 0,
|
||||
rows_inserted integer NOT NULL DEFAULT 0,
|
||||
rows_updated integer NOT NULL DEFAULT 0,
|
||||
rows_skipped integer NOT NULL DEFAULT 0,
|
||||
duration_ms integer NOT NULL DEFAULT 0,
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||||
error text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cnas_scrape_log_started ON cnas.scrape_log(started_at DESC);
|
||||
|
||||
-- ── 4. Materialized view: per-CUI provider rollup ─────────────────────────
|
||||
-- Joinable with seap.announcements + anaf_datornici + fonduri.proiect for
|
||||
-- cross-source detection. Refreshed via cron after match-cui pass.
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS cnas.mv_furnizori_per_cui AS
|
||||
SELECT
|
||||
cui,
|
||||
COUNT(*) AS nr_aparitii, -- across all lists
|
||||
COUNT(DISTINCT tip_serviciu) FILTER (WHERE tip_serviciu IS NOT NULL) AS nr_tipuri_serviciu,
|
||||
COUNT(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS nr_judete,
|
||||
array_agg(DISTINCT tip_serviciu) FILTER (WHERE tip_serviciu IS NOT NULL) AS tipuri_serviciu,
|
||||
array_agg(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS judete,
|
||||
-- One representative name (most common):
|
||||
(array_agg(name ORDER BY length(name)))[1] AS name_sample,
|
||||
MIN(fetched_at) AS first_seen,
|
||||
MAX(fetched_at) AS last_seen
|
||||
FROM cnas.furnizori
|
||||
WHERE cui IS NOT NULL
|
||||
GROUP BY cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnas_mv_per_cui ON cnas.mv_furnizori_per_cui(cui);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW cnas.mv_furnizori_per_cui IS
|
||||
'Per-CUI rollup of CNAS provider appearances. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnas.mv_furnizori_per_cui;';
|
||||
@@ -0,0 +1,118 @@
|
||||
-- 032_aaas.sql
|
||||
-- AAAS — Autoritatea pentru Administrarea Activelor Statului.
|
||||
-- Manages the state's residual ownership in privatized firms + collects
|
||||
-- post-privatization debts. Tagging firms with "state still owns" /
|
||||
-- "owes state money" / "post-priv investment obligation" is rare and powerful.
|
||||
--
|
||||
-- Sources investigated 2026-05-10:
|
||||
-- 1. https://www.aaas.gov.ro/despre-aaas/1-9-guvernanta-corporativa-aaas/
|
||||
-- 1-9-3-companii-sub-autoritatea-aaas/
|
||||
-- → 12 named active-portfolio companies; each has a clean subpage with
|
||||
-- CUI, J-number, address, phone, web, email, AAAS share %.
|
||||
-- 2. https://www.aaas.gov.ro/4-oferta-a-a-a-s/4-2-vanzari-actiuni/
|
||||
-- → "SECȚIUNE ÎN CONSTRUCȚIE" — only EXPO PARC SRL Iași as PDF teaser.
|
||||
-- 3. https://www.aaas.gov.ro/4-oferta-a-a-a-s/4-3-valorificare-creante/
|
||||
-- → "SECȚIUNE ÎN CONSTRUCȚIE" — debt list not published structured online.
|
||||
-- 4. https://gwp.aaas.gov.ro/Directia-creante
|
||||
-- → Login-gated services portal; no anonymous CUI/debtor lookup.
|
||||
--
|
||||
-- This schema is intentionally narrow: 12-15 confirmed CUIs is small but
|
||||
-- HIGH SIGNAL — every firms profile that joins back here gets "STAT DEȚINE
|
||||
-- ACȚIUNI" tag. Future passes can ingest historical lists (e.g. ORDIN
|
||||
-- 278/2005 PDF — 800+ commercial companies × 41 counties as legacy snapshot).
|
||||
--
|
||||
-- Cross-source value:
|
||||
-- aaas.firme.cui × seap.announcements.supplier_cui
|
||||
-- = "Companies in active state portfolio winning more state contracts"
|
||||
-- aaas.firme.cui × anaf.datornici
|
||||
-- = "State-owned company that itself owes the state money"
|
||||
-- aaas.firme.cui × firms.financials
|
||||
-- = "How is the residual state-owned portfolio actually performing?"
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS aaas;
|
||||
|
||||
-- ── 1. Firme sub autoritatea AAAS / monitorizate de AAAS ───────────────────
|
||||
-- One row per company, keyed by CUI. Status enum captures the AAAS
|
||||
-- relationship type. Re-runs UPSERT on cui (last_action / state_share_pct
|
||||
-- can change). Original AAAS subpage URL preserved for traceability.
|
||||
CREATE TABLE IF NOT EXISTS aaas.firme (
|
||||
cui text PRIMARY KEY,
|
||||
name text NOT NULL, -- raw name as published by AAAS
|
||||
name_normalized text, -- firms.normalize_company_name(name)
|
||||
reg_number text, -- "J40/8215/2020"
|
||||
aaas_status text NOT NULL, -- 'active_holding' | 'post_priv_debt' | 'insolventa' | 'recuperare' | 'vanzare_actiuni' | 'vanzare_creante'
|
||||
state_share_pct numeric(6,3), -- "Participatie AAAS: 100%"
|
||||
debt_to_state_lei numeric(20,2), -- only when AAAS publishes structured amount
|
||||
last_action text, -- short description of last AAAS action (free-form)
|
||||
last_action_date date,
|
||||
address text, -- "Adresa: ..." line
|
||||
phone text,
|
||||
email text,
|
||||
website text,
|
||||
source_url text NOT NULL, -- specific AAAS subpage
|
||||
notes text,
|
||||
raw jsonb, -- full parsed key/value bag
|
||||
cui_match_score numeric(4,3), -- only if matched via fuzzy (NULL if AAAS itself published the CUI)
|
||||
cui_match_method text, -- 'aaas_published' | 'exact_norm' | 'trgm_unique'
|
||||
fetched_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_aaas_firme_status ON aaas.firme(aaas_status);
|
||||
CREATE INDEX IF NOT EXISTS idx_aaas_firme_debt ON aaas.firme(debt_to_state_lei DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_aaas_firme_share_pct ON aaas.firme(state_share_pct DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_aaas_firme_name_trgm ON aaas.firme USING gin (name_normalized gin_trgm_ops);
|
||||
|
||||
COMMENT ON TABLE aaas.firme IS
|
||||
'Firme aflate sub autoritatea AAAS / monitorizate de AAAS. PK = CUI. '
|
||||
'Sursă primară: aaas.gov.ro subpages 1.9.3 + 4.2 + 4.3 (HTML scrape).';
|
||||
COMMENT ON COLUMN aaas.firme.aaas_status IS
|
||||
'active_holding = AAAS deține pachet de acțiuni; '
|
||||
'post_priv_debt = obligații post-privatizare neîndeplinite; '
|
||||
'insolventa = în procedură de insolvență administrată de AAAS; '
|
||||
'recuperare = creanță în recuperare; '
|
||||
'vanzare_actiuni = ofertă de vânzare acțiuni publicată; '
|
||||
'vanzare_creante = ofertă de vânzare creanță publicată.';
|
||||
COMMENT ON COLUMN aaas.firme.state_share_pct IS
|
||||
'Procent de participație AAAS în acționariat. 100 = stat unic acționar.';
|
||||
COMMENT ON COLUMN aaas.firme.cui_match_method IS
|
||||
'aaas_published = CUI publicat direct de AAAS (autoritativ); '
|
||||
'exact_norm = match exact pe firms.normalize_company_name; '
|
||||
'trgm_unique = match trigram unic peste 0.85.';
|
||||
|
||||
-- ── 2. Scrape log (mirrors anre.scrape_log convention) ────────────────────
|
||||
CREATE TABLE IF NOT EXISTS aaas.scrape_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
scraper text NOT NULL, -- 'portfolio' | 'vanzari_actiuni' | 'vanzari_creante'
|
||||
source_url text NOT NULL,
|
||||
rows_seen integer NOT NULL DEFAULT 0,
|
||||
rows_inserted integer NOT NULL DEFAULT 0,
|
||||
rows_updated integer NOT NULL DEFAULT 0,
|
||||
rows_skipped integer NOT NULL DEFAULT 0,
|
||||
duration_ms integer NOT NULL DEFAULT 0,
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||||
error text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_aaas_scrape_log_started ON aaas.scrape_log(started_at DESC);
|
||||
|
||||
-- ── 3. Materialized view: per-CUI rollup for joinability ──────────────────
|
||||
-- Trivial today (1 row per cui), but the MV pattern is consistent with
|
||||
-- anre.mv_licente_per_cui / regas / etc. and keeps the join API uniform
|
||||
-- when more AAAS sources land. Refresh: REFRESH MATERIALIZED VIEW
|
||||
-- CONCURRENTLY aaas.mv_per_cui;
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS aaas.mv_per_cui AS
|
||||
SELECT
|
||||
cui,
|
||||
array_agg(DISTINCT aaas_status) AS statusuri,
|
||||
MAX(state_share_pct) AS max_state_share_pct,
|
||||
SUM(debt_to_state_lei) AS total_debt_to_state_lei,
|
||||
MAX(fetched_at) AS last_seen_at,
|
||||
COUNT(*) AS rows_count
|
||||
FROM aaas.firme
|
||||
GROUP BY cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_aaas_mv_per_cui ON aaas.mv_per_cui(cui);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW aaas.mv_per_cui IS
|
||||
'Rollup AAAS per CUI. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY aaas.mv_per_cui.';
|
||||
@@ -0,0 +1,202 @@
|
||||
-- 033_cnsc.sql
|
||||
-- CNSC — Consiliul Național de Soluționare a Contestațiilor.
|
||||
-- Independent administrative-jurisdictional body that resolves
|
||||
-- contestations against SEAP procurement procedures (Law 101/2016).
|
||||
--
|
||||
-- Source investigated 2026-05-10:
|
||||
-- http://portal.cnsc.ro/decizii.html
|
||||
-- → ASP.NET WebForms front, but data is loaded async via
|
||||
-- POST http://portal.cnsc.ro/Default.aspx/CallWebMethod
|
||||
-- payload: {"sender":"67fb8141-f456-4276-b51a-baca731f92ca",
|
||||
-- "methodName":"get",
|
||||
-- "senderParams":"a=search®:registrationDate=-&page=N",
|
||||
-- "isBuletin":"0"}
|
||||
-- Header: Referer must match the page query — server reads page from it.
|
||||
-- Cookies: ASP.NET_SessionId required; obtained by GET /decizii.html first.
|
||||
-- → Returns JSON {"d":"<html>...</html>"} with a <table> inside.
|
||||
-- → 50 rows per page × 617 pages ≈ 30,850 decisions.
|
||||
-- → Page 1 has 2026 decisions; last page (617) lands in 2024.
|
||||
-- Sort order is by decision number DESC (panel-grouped).
|
||||
--
|
||||
-- Column layout in the listing (already structured — no PDF parse needed
|
||||
-- to get 80% of the value):
|
||||
-- 1. Numar decizie → decision_no
|
||||
-- 2. Nume Contestator → contestator_name (sometimes multiple)
|
||||
-- 3. Nr. Inregistrare CNSC → registration_no_cnsc
|
||||
-- 4. Denumire Autoritate → authority_name
|
||||
-- 5. CUI Contestator → contestator_cui ← linkable to firms.entities
|
||||
-- 6. CUI Autoritate Contractantă → authority_cui ← linkable to seap.announcements.authority_cui
|
||||
-- 7. An → year
|
||||
-- 8. Dată Inregistrare → registration_date
|
||||
-- 9. Download → pdf_docuid (b64 docUID for sivadoc/download.aspx)
|
||||
--
|
||||
-- IMPORTANT: the listing does NOT include the SEAP procedure_ref (CN######) —
|
||||
-- that lives only inside the PDF text. Stage 2 (PDF text extraction with
|
||||
-- pdftotext + regex for "CN[0-9]{6,}|SCN[0-9]+|ADV[0-9]+" can recover the
|
||||
-- SEAP ref for ~80% of decisions; estimate 15-25h to design+QA the parser
|
||||
-- across the full 30K corpus.
|
||||
--
|
||||
-- Decision_type is also PDF-only: the listing shows when the contestation
|
||||
-- was REGISTERED, not the outcome. PDF parsing is required to recover
|
||||
-- 'admis' / 'respins' / 'admis în parte' / 'redirecționat' / 'arhivat'.
|
||||
--
|
||||
-- Cross-source value (live, even at Stage 1):
|
||||
-- cnsc.decizii.authority_cui × seap.announcements.authority_cui
|
||||
-- = "Authorities with most contestations filed against them" (procedural risk score)
|
||||
-- cnsc.decizii.contestator_cui × seap.announcements.supplier_cui
|
||||
-- = "Suppliers most active in contesting losses" (litigious-bidder profile)
|
||||
-- cnsc.decizii.contestator_cui × firms.entities + financials
|
||||
-- = "Who challenges the most? Are they real bidders or vexatious filers?"
|
||||
--
|
||||
-- After Stage 2 PDF parse:
|
||||
-- cnsc.decizii × seap.announcements ON seap_procedure_ref = ref_number
|
||||
-- = full contestation lifecycle: tender → contestation → CNSC outcome → award
|
||||
-- GROUP BY authority_cui WHERE decision_type='admis' / total
|
||||
-- = "Authorities most likely to lose at CNSC" — strong signal of vicious
|
||||
-- procedure design. THIS IS THE KILLER QUERY.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS cnsc;
|
||||
|
||||
-- ── 1. Decizii — one row per CNSC decision ─────────────────────────────────
|
||||
-- PK = (decision_no, year). The CNSC numbering resets every year, so
|
||||
-- (1234, 2024) ≠ (1234, 2025). docUID is unique-by-doc but not all rows
|
||||
-- have one in old data, so we don't make it the PK.
|
||||
CREATE TABLE IF NOT EXISTS cnsc.decizii (
|
||||
id bigserial PRIMARY KEY,
|
||||
decision_no integer NOT NULL, -- 1445
|
||||
decision_year smallint NOT NULL, -- 2026
|
||||
registration_no_cnsc text, -- "10549, 20389, 20395" — comma-separated when multiple
|
||||
registration_date date, -- 17.02.2026 → 2026-02-17
|
||||
|
||||
-- Contestator (the bidder who filed the complaint)
|
||||
contestator_name_raw text, -- "RAC CONSTRUCT MORENI SRL , RAC CONSTRUCT MORENI SRL, URBIO DOWNSTREAM SRL"
|
||||
contestator_names text[], -- split + trimmed
|
||||
contestator_cui_raw text, -- "RO18035010" or "RO18035010;4663448" or empty
|
||||
contestator_cuis text[], -- normalized digits only, duplicates collapsed
|
||||
|
||||
-- Autoritatea contractantă (the public buyer being contested)
|
||||
authority_name text,
|
||||
authority_cui_raw text, -- "4495140;" or "16054368" or empty
|
||||
authority_cuis text[], -- normalized digits only, duplicates collapsed
|
||||
|
||||
-- PDF reference (download URL is built from docuid_b64 + filename_b64)
|
||||
pdf_filename text, -- "Decizie_1445.pdf"
|
||||
pdf_docuid_b64 text, -- "Mzg4NThkZGQtY2JkMS00ZDg3LTlhY2UtY2ZlMTBlYzAwM2Y0"
|
||||
pdf_url text, -- materialized: full http://portal.cnsc.ro/sivadoc/download.aspx?...
|
||||
|
||||
-- Stage-2 (PDF parse) fields — NULL until pdftotext+regex pass runs
|
||||
-- Mostly populated post-hoc; kept here so the schema stays single-table.
|
||||
seap_procedure_ref text, -- "CN1234567" / "ADV……" / "SCN……" — joinable to seap.announcements.ref_number
|
||||
decision_type text, -- 'admis' | 'admis_in_parte' | 'respins' | 'redirectionat' | 'arhivat' | 'fond' | NULL
|
||||
decision_date date, -- date the decision was issued (different from registration_date)
|
||||
decision_summary text, -- short extracted summary
|
||||
pdf_text_sha1 text, -- sha1 of pdftotext output → idempotent re-parse
|
||||
pdf_parsed_at timestamptz,
|
||||
|
||||
-- Provenance
|
||||
source_page integer, -- which listing page we found this on (debug)
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
|
||||
CONSTRAINT cnsc_decizii_pk_natural UNIQUE (decision_no, decision_year)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_authority_cuis
|
||||
ON cnsc.decizii USING gin (authority_cuis);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_contestator_cuis
|
||||
ON cnsc.decizii USING gin (contestator_cuis);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_seap_ref
|
||||
ON cnsc.decizii (seap_procedure_ref) WHERE seap_procedure_ref IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_decision_type
|
||||
ON cnsc.decizii (decision_type) WHERE decision_type IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_year
|
||||
ON cnsc.decizii (decision_year DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_reg_date
|
||||
ON cnsc.decizii (registration_date DESC NULLS LAST);
|
||||
|
||||
COMMENT ON TABLE cnsc.decizii IS
|
||||
'Decizii CNSC — contestații pe proceduri SEAP. PK natural (decision_no, decision_year). '
|
||||
'Stage 1: scrape listing din portal.cnsc.ro/decizii.html (50/page × 617 pages ≈ 30K rows). '
|
||||
'Stage 2 (TODO): pdftotext pe PDF-ul referit → seap_procedure_ref + decision_type.';
|
||||
COMMENT ON COLUMN cnsc.decizii.decision_no IS
|
||||
'Numărul deciziei CNSC. Resetat anual — întotdeauna unic doar împreună cu decision_year.';
|
||||
COMMENT ON COLUMN cnsc.decizii.registration_no_cnsc IS
|
||||
'Numărul/numerele de înregistrare a contestației la CNSC. Poate fi listă comma-separată '
|
||||
'când o decizie soluționează mai multe contestații (ex. "10549, 20389, 20395").';
|
||||
COMMENT ON COLUMN cnsc.decizii.contestator_cuis IS
|
||||
'CUIs cifre-only ale contestatorilor (mai mulți când o asociere atacă). '
|
||||
'Joinabil cu firms.entities.cui sau seap.announcements.supplier_cui.';
|
||||
COMMENT ON COLUMN cnsc.decizii.authority_cuis IS
|
||||
'CUIs cifre-only ale autorităților contractante. Joinabil cu seap.announcements.authority_cui.';
|
||||
COMMENT ON COLUMN cnsc.decizii.pdf_url IS
|
||||
'URL complet sivadoc/download.aspx?docUID=…&filename=…&action=inline (b64 in query).';
|
||||
COMMENT ON COLUMN cnsc.decizii.seap_procedure_ref IS
|
||||
'Referința procedurii SEAP extrasă din PDF (CN/SCN/ADV/RFQ + cifre). NULL până la Stage-2 PDF-parse.';
|
||||
COMMENT ON COLUMN cnsc.decizii.decision_type IS
|
||||
'Tipul deciziei extras din textul PDF: admis | admis_in_parte | respins | redirectionat | arhivat | fond. NULL până la Stage-2.';
|
||||
|
||||
-- ── 2. Scrape log (mirrors anre/aaas convention) ──────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS cnsc.scrape_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
scraper text NOT NULL, -- 'listing' | 'pdf_parse'
|
||||
page_from integer,
|
||||
page_to integer,
|
||||
rows_seen integer NOT NULL DEFAULT 0,
|
||||
rows_inserted integer NOT NULL DEFAULT 0,
|
||||
rows_updated integer NOT NULL DEFAULT 0,
|
||||
rows_skipped integer NOT NULL DEFAULT 0,
|
||||
duration_ms integer NOT NULL DEFAULT 0,
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||||
error text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cnsc_scrape_log_started ON cnsc.scrape_log(started_at DESC);
|
||||
|
||||
-- ── 3. Materialized view: per-CUI rollup (authority side) ─────────────────
|
||||
-- Used by the killer query "authorities most likely to lose at CNSC".
|
||||
-- decision_type rollup is meaningful only after Stage-2 PDF parse is done;
|
||||
-- until then admis_count / respins_count are 0 and contestation_count is
|
||||
-- the useful field.
|
||||
-- Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui;
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_authority_cui AS
|
||||
SELECT
|
||||
authority_cui AS cui,
|
||||
COUNT(*) AS contestation_count,
|
||||
COUNT(*) FILTER (WHERE decision_type = 'admis') AS admis_count,
|
||||
COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS admis_in_parte_count,
|
||||
COUNT(*) FILTER (WHERE decision_type = 'respins') AS respins_count,
|
||||
COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count,
|
||||
MIN(registration_date) AS first_contestation_date,
|
||||
MAX(registration_date) AS last_contestation_date
|
||||
FROM cnsc.decizii d, unnest(authority_cuis) AS authority_cui
|
||||
WHERE authority_cui IS NOT NULL AND authority_cui <> ''
|
||||
GROUP BY authority_cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_authority_cui
|
||||
ON cnsc.mv_per_authority_cui(cui);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW cnsc.mv_per_authority_cui IS
|
||||
'Rollup CNSC per autoritate contractantă (CUI). Refresh: '
|
||||
'REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui.';
|
||||
|
||||
-- ── 4. Materialized view: per-CUI rollup (contestator side) ───────────────
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_contestator_cui AS
|
||||
SELECT
|
||||
contestator_cui AS cui,
|
||||
COUNT(*) AS contestations_filed,
|
||||
COUNT(*) FILTER (WHERE decision_type = 'admis') AS won_admis,
|
||||
COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS won_partial,
|
||||
COUNT(*) FILTER (WHERE decision_type = 'respins') AS lost_respins,
|
||||
COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count,
|
||||
MIN(registration_date) AS first_contestation_date,
|
||||
MAX(registration_date) AS last_contestation_date
|
||||
FROM cnsc.decizii d, unnest(contestator_cuis) AS contestator_cui
|
||||
WHERE contestator_cui IS NOT NULL AND contestator_cui <> ''
|
||||
GROUP BY contestator_cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_contestator_cui
|
||||
ON cnsc.mv_per_contestator_cui(cui);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW cnsc.mv_per_contestator_cui IS
|
||||
'Rollup CNSC per contestator (CUI). Cine atacă cel mai mult, cu ce rată de succes. '
|
||||
'Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_contestator_cui.';
|
||||
@@ -0,0 +1,123 @@
|
||||
-- 034_asf.sql
|
||||
-- ASF — Autoritatea de Supraveghere Financiară.
|
||||
-- Public registries of authorized financial entities (insurers, brokers, pension
|
||||
-- funds, asset managers, intermediaries) scraped from data.asfromania.ro.
|
||||
--
|
||||
-- Sources (all return JSON{raspuns:HTML, status:100} via POST cautare):
|
||||
-- 1. /scr/ra/cautare?l=ro (Registrul asigurătorilor + intermediarilor)
|
||||
-- sectiune=1 tipCompanie=0 → Societăți de asigurare - companii active
|
||||
-- sectiune=2 tipCompanie=0 → Societăți de asigurare - companii radiate
|
||||
-- sectiune=1 tipCompanie=1 → Intermediari principali - companii active
|
||||
-- sectiune=2 tipCompanie=1 → Intermediari principali - companii radiate
|
||||
-- Fields per panel: register_no (RA-XXX/RBK-XXX), LEI, CUI, RC code,
|
||||
-- authorization no/date, registration date, radiation date, type, legal form,
|
||||
-- address, phone, fax, observations, authorized classes (general/life),
|
||||
-- executives. Total: ~768 insurers + ~801 brokers ≈ 1.5K entities.
|
||||
--
|
||||
-- 2. /scr/ra/cautare endpoint accepts free-text 'termen' (≥4 chars). Search
|
||||
-- hits denumire, CUI, adresă, județ, classes. NO captcha required when
|
||||
-- 'g-recaptcha-response' field is OMITTED from the POST body. (When sent
|
||||
-- with any non-empty value the server tries to verify and returns
|
||||
-- "Verificare captcha eșuată".)
|
||||
--
|
||||
-- 3. Pension funds + AIFM/UCITS register pages exist on asfromania.ro/ro/a/...
|
||||
-- but most are F5-WAF-protected from non-browser clients. We start with the
|
||||
-- ra portal which has cleanest data; document handoff for additional
|
||||
-- registers in ASF-PLAN.md.
|
||||
--
|
||||
-- Cross-source value: asf.entitati.cui (extracted directly from response, no
|
||||
-- fuzzy match needed) × seap.announcements.supplier_cui = "ASF-licensed firms
|
||||
-- with state contracts". Red-flag: insurance firm wins SEAP contract for state
|
||||
-- insurance services but has been radiated by ASF; broker active in SEAP but
|
||||
-- with suspended/withdrawn ASF authorization.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS asf;
|
||||
|
||||
-- ── 1. Authorized entities (insurers, brokers, pension funds, AIFM, UCITS) ──
|
||||
-- One row per distinct ASF register entry. Every entity has a register_no
|
||||
-- (RA-NNN for insurers, RBK-NNN for brokers, etc.) which is globally unique
|
||||
-- per register_type.
|
||||
CREATE TABLE IF NOT EXISTS asf.entitati (
|
||||
id bigserial PRIMARY KEY,
|
||||
register_type text NOT NULL, -- 'asigurator' | 'broker' | 'fond_pensii' | 'aifm' | 'ucits' | 'intermediar_secundar'
|
||||
section_status text NOT NULL, -- 'activ' | 'radiat' (mirrors source sectiune=1/2 split)
|
||||
register_no text NOT NULL, -- e.g. "RA-057", "RBK-123" (unique within register_type)
|
||||
name text NOT NULL, -- raw "Denumire"
|
||||
name_normalized text, -- firms.normalize_company_name(name) — for trigram fallback
|
||||
cui text, -- "Cod unic de identificare (CUI)"
|
||||
cod_rc text, -- "Cod unic RC" (e.g. J40/2226/2006)
|
||||
cod_lei text, -- LEI 20-char
|
||||
nr_autorizatie text, -- "Număr autorizație" (e.g. 114.146)
|
||||
data_autorizare date, -- "Dată autorizare"
|
||||
data_inmatriculare date, -- "Dată înmatriculare"
|
||||
data_radiere date, -- "Dată radiere" (NULL when active)
|
||||
tip_companie text, -- "Tip companie" (Societate de asigurare / Intermediar principal / etc.)
|
||||
forma_juridica text, -- "Formă juridică"
|
||||
adresa text, -- "Adresă"
|
||||
telefon text,
|
||||
fax text,
|
||||
email text,
|
||||
web text,
|
||||
observatii text, -- free-text remarks
|
||||
clase_autorizate jsonb, -- {"asigurari_generale":[...], "asigurari_viata":[...]}
|
||||
conducere jsonb, -- [{"nume":"X","functie":"Y","din":"DD.MM.YYYY"}]
|
||||
raw_html text, -- raw panel HTML for traceability
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
UNIQUE (register_type, register_no)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_asf_entitati_cui ON asf.entitati(cui) WHERE cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_asf_entitati_name_norm_trgm ON asf.entitati USING gin (name_normalized gin_trgm_ops);
|
||||
CREATE INDEX IF NOT EXISTS idx_asf_entitati_type_status ON asf.entitati(register_type, section_status);
|
||||
CREATE INDEX IF NOT EXISTS idx_asf_entitati_radiere ON asf.entitati(data_radiere) WHERE data_radiere IS NOT NULL;
|
||||
|
||||
COMMENT ON TABLE asf.entitati IS
|
||||
'ASF authorized entities — insurers, brokers, pension funds, AIFM/UCITS, intermediaries. Source: data.asfromania.ro/scr/ra (and other registers).';
|
||||
COMMENT ON COLUMN asf.entitati.register_type IS
|
||||
'asigurator (RA-NNN) / broker (RBK-NNN) / fond_pensii / aifm / ucits / intermediar_secundar';
|
||||
COMMENT ON COLUMN asf.entitati.section_status IS
|
||||
'activ / radiat — mirrors source sectiune=1/sectiune=2 split. Active record has data_radiere=NULL.';
|
||||
|
||||
-- ── 2. Scrape log (mirrors anre.scrape_log convention) ──────────────────────
|
||||
CREATE TABLE IF NOT EXISTS asf.scrape_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
scraper text NOT NULL, -- 'asigurator_activ' / 'asigurator_radiat' / 'broker_activ' / ...
|
||||
source_url text NOT NULL,
|
||||
rows_seen integer NOT NULL DEFAULT 0,
|
||||
rows_inserted integer NOT NULL DEFAULT 0,
|
||||
rows_updated integer NOT NULL DEFAULT 0,
|
||||
rows_skipped integer NOT NULL DEFAULT 0,
|
||||
duration_ms integer NOT NULL DEFAULT 0,
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||||
error text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_asf_scrape_log_started ON asf.scrape_log(started_at DESC);
|
||||
|
||||
-- ── 3. Materialized view: per-CUI ASF rollup ────────────────────────────────
|
||||
-- Joinable with seap.announcements.supplier_cui to detect financial firms
|
||||
-- holding state contracts.
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS asf.mv_entitati_per_cui AS
|
||||
SELECT
|
||||
cui,
|
||||
COUNT(*) AS nr_total,
|
||||
COUNT(*) FILTER (WHERE register_type = 'asigurator') AS nr_asigurator,
|
||||
COUNT(*) FILTER (WHERE register_type = 'broker') AS nr_broker,
|
||||
COUNT(*) FILTER (WHERE register_type = 'fond_pensii') AS nr_fond_pensii,
|
||||
COUNT(*) FILTER (WHERE register_type = 'aifm') AS nr_aifm,
|
||||
COUNT(*) FILTER (WHERE register_type = 'ucits') AS nr_ucits,
|
||||
COUNT(*) FILTER (WHERE section_status = 'activ') AS nr_active,
|
||||
COUNT(*) FILTER (WHERE section_status = 'radiat') AS nr_radiate,
|
||||
array_agg(DISTINCT register_type) AS register_types,
|
||||
array_agg(DISTINCT register_no ORDER BY register_no) AS register_numbers,
|
||||
MIN(data_autorizare) AS prima_autorizare,
|
||||
MAX(data_radiere) AS ultima_radiere
|
||||
FROM asf.entitati
|
||||
WHERE cui IS NOT NULL
|
||||
GROUP BY cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_asf_mv_entitati_per_cui ON asf.mv_entitati_per_cui(cui);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW asf.mv_entitati_per_cui IS
|
||||
'Rollup of ASF entities per CUI. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY asf.mv_entitati_per_cui';
|
||||
@@ -0,0 +1,120 @@
|
||||
-- 035_curteacont.sql
|
||||
-- Curtea de Conturi a României — Rapoarte de audit financiar / conformitate /
|
||||
-- performanță / control / follow-up.
|
||||
--
|
||||
-- Source: https://www.curteadeconturi.ro/rapoarte-audit/{category}
|
||||
-- Categories scraped:
|
||||
-- - rapoarte-audit-financiar (~1,890 reports, 127 listing pages × 15)
|
||||
-- - rapoarte-conformitate (~2,580 reports, 173 pages × 15)
|
||||
-- - rapoarte-audit-performanta (~135 reports, 9 pages × 15)
|
||||
-- Approximate total: ~4,600 reports, growing weekly with new audits.
|
||||
--
|
||||
-- Detail page exposes a single PDF download link of the form
|
||||
-- `/rapoarte-audit/downloads/{integer_id}` (verified IDs: 4078, 7335, 7854,
|
||||
-- 10653, 12418, 13832, 14183 — sequential, predictable).
|
||||
--
|
||||
-- Stage 1 (this file + scrape-curteacont.ts):
|
||||
-- - Walks listing pages, harvests slug URLs + titles + publication dates +
|
||||
-- audit_type + audited entity name (parsed from title).
|
||||
-- - DOES NOT fetch detail pages or download PDFs (that is Stage 2 — see
|
||||
-- CURTEACONT-PLAN.md for the 15-25h roadmap).
|
||||
--
|
||||
-- Stage 2 (next session):
|
||||
-- - For each row with NULL pdf_url, fetch detail page → extract
|
||||
-- /downloads/{id} numeric PDF ID + file size.
|
||||
-- - Optionally download PDF to satra disk under /opt/vreaudigital/data/cdc/.
|
||||
-- - Run pdfminer/pdftotext against first 3 pages → extract structured
|
||||
-- summary, findings_count, key amounts.
|
||||
-- - Fuzzy-match audited_entity_name against firms.entities.denumire (lib
|
||||
-- curatat already exists at services/seap-scraper/src/matching/) → fill
|
||||
-- audited_entity_cui.
|
||||
--
|
||||
-- PRIMARY KEY:
|
||||
-- slug_id = sha1(category || '|' || slug). The numeric download ID is NULL
|
||||
-- until Stage 2 resolves it from the detail page. We keep it nullable + add
|
||||
-- a separate UNIQUE constraint when discovered.
|
||||
--
|
||||
-- Cross-source value (recipe drafts in CURTEACONT-PLAN.md):
|
||||
-- 1. "Autorități audited de N ori în 5 ani" — repeat-audit risk score.
|
||||
-- 2. "Spitale audited POST SEAP award" — paralelă cu CNAS cross-source.
|
||||
-- 3. "Rapoarte follow-up" — semnal că auditul anterior n-a fost remediat.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS curteacont;
|
||||
|
||||
-- ── Rapoarte de audit ───────────────────────────────────────────────────────
|
||||
-- One row per audit report listed by Curtea de Conturi. Source of truth is
|
||||
-- the listing page slug; numeric download_id (PDF) is filled in Stage 2.
|
||||
CREATE TABLE IF NOT EXISTS curteacont.rapoarte (
|
||||
slug_id char(40) PRIMARY KEY, -- sha1(category|slug)
|
||||
download_id integer, -- /downloads/{id}, filled in Stage 2
|
||||
category text NOT NULL, -- 'rapoarte-audit-financiar' | 'rapoarte-conformitate' | 'rapoarte-audit-performanta'
|
||||
slug text NOT NULL, -- last URL segment, unique within category
|
||||
detail_url text NOT NULL, -- absolute URL to detail page
|
||||
|
||||
title text NOT NULL, -- raw title from listing
|
||||
audit_type text, -- 'financiar' | 'conformitate' | 'performanta' | 'control' | 'follow-up'
|
||||
audit_year smallint, -- year the audit covers (e.g. 2024 in "pentru anul 2024")
|
||||
doc_number text, -- "nr.27500" → "27500"
|
||||
doc_date date, -- "07.04.2026" parsed
|
||||
audited_entity_name text, -- raw extracted from title after the last comma
|
||||
audited_entity_cui text, -- filled in Stage 2 via fuzzy match
|
||||
|
||||
publication_date date, -- from <time datetime="..."> on listing card
|
||||
|
||||
pdf_url text, -- /rapoarte-audit/downloads/{id} — Stage 2
|
||||
pdf_size_bytes bigint, -- parsed from "(X,YZ MB)" — Stage 2
|
||||
pdf_path text, -- if mirrored to satra disk — Stage 2 optional
|
||||
summary text, -- first-page abstract — Stage 2 PDF parse
|
||||
findings_count integer, -- count of "constatări" — Stage 2 PDF parse
|
||||
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
parsed_at timestamptz -- set when Stage 2 PDF parse completes
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS rapoarte_category_slug_uniq
|
||||
ON curteacont.rapoarte (category, slug);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS rapoarte_download_id_uniq
|
||||
ON curteacont.rapoarte (download_id) WHERE download_id IS NOT NULL;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS rapoarte_audit_year_idx
|
||||
ON curteacont.rapoarte (audit_year DESC NULLS LAST);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS rapoarte_audit_type_idx
|
||||
ON curteacont.rapoarte (audit_type);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS rapoarte_pub_date_idx
|
||||
ON curteacont.rapoarte (publication_date DESC NULLS LAST);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS rapoarte_audited_cui_idx
|
||||
ON curteacont.rapoarte (audited_entity_cui) WHERE audited_entity_cui IS NOT NULL;
|
||||
|
||||
-- Trigram index for fuzzy entity-name matching (Stage 2 needs it for CUI resolve).
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
CREATE INDEX IF NOT EXISTS rapoarte_entity_trgm_idx
|
||||
ON curteacont.rapoarte USING gin (audited_entity_name gin_trgm_ops)
|
||||
WHERE audited_entity_name IS NOT NULL;
|
||||
|
||||
-- ── Scrape run log ──────────────────────────────────────────────────────────
|
||||
-- One row per CLI invocation. Idempotent inserts on (started_at,category).
|
||||
CREATE TABLE IF NOT EXISTS curteacont.scrape_runs (
|
||||
id bigserial PRIMARY KEY,
|
||||
category text NOT NULL,
|
||||
started_at timestamptz NOT NULL DEFAULT now(),
|
||||
finished_at timestamptz,
|
||||
pages_visited integer NOT NULL DEFAULT 0,
|
||||
rows_inserted integer NOT NULL DEFAULT 0,
|
||||
rows_updated integer NOT NULL DEFAULT 0,
|
||||
rows_skipped integer NOT NULL DEFAULT 0,
|
||||
last_error text,
|
||||
notes text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS scrape_runs_started_idx
|
||||
ON curteacont.scrape_runs (started_at DESC);
|
||||
|
||||
COMMENT ON SCHEMA curteacont IS
|
||||
'Curtea de Conturi audit reports (https://www.curteadeconturi.ro/rapoarte-audit). Stage 1 = URL+metadata harvest from listing pages. Stage 2 = detail-page resolve + PDF parse + CUI fuzzy match.';
|
||||
|
||||
COMMENT ON TABLE curteacont.rapoarte IS
|
||||
'One row per audit report. PK is sha1(category|slug). Numeric download_id and PDF metadata filled in Stage 2 (see CURTEACONT-PLAN.md).';
|
||||
@@ -0,0 +1,235 @@
|
||||
-- 036_apia.sql
|
||||
-- APIA — Agenția de Plăți și Intervenție pentru Agricultură.
|
||||
-- "Lista fermierilor" — annual list of farmers receiving direct payments
|
||||
-- (subvenții) per UAT campaign. Covers ~each commune that publishes a list
|
||||
-- via data.gov.ro (currently 2024 only; one comuna live, more on the way).
|
||||
--
|
||||
-- Sources investigated 2026-05-10:
|
||||
-- 1. data.gov.ro CKAN — only ONE published "Lista fermieri APIA" XLSX
|
||||
-- lives at /api/3/action/package_show?id=lista-fermierilor-campania-apia-2024
|
||||
-- (single resource: comuna Găgești, jud. Vaslui, 192 farmers).
|
||||
-- The schema is per-comuna so future ingests over the same package
|
||||
-- will multiply rows linearly.
|
||||
-- 2. https://www.apia.org.ro/ — bot-blocked (HTTP 403 from non-browser
|
||||
-- User-Agents). National-level lists exist on APIA's site but require
|
||||
-- JS / browser session to retrieve. Out of scope for this pass.
|
||||
-- 3. AFIR FEGA dump (fonduri.afir_plati WHERE tip_fond='FEGA', 4.29M rows
|
||||
-- for 2023+2024) is the *closest* national equivalent — it contains
|
||||
-- payment amounts but no SUPRAFATA (hectares) and no
|
||||
-- RESPONSABIL UAT / CENTRUL APIA fields.
|
||||
--
|
||||
-- This schema is therefore intentionally narrow but extensible:
|
||||
-- - one row per (campaign_year, name, comuna, sat) — natural composite key
|
||||
-- - source_dataset_id + source_resource_id on every row → idempotent re-ingest
|
||||
-- - cui populated only when the row is a legal person (SC ... SRL / PFA);
|
||||
-- for natural persons (CNP-keyed in source) cui stays NULL
|
||||
--
|
||||
-- Cross-source value:
|
||||
-- apia.fermieri.cui × fonduri.afir_plati(tip_fond='FEGA').cui
|
||||
-- = "Fermier in lista APIA care apare ȘI in plățile FEGA AFIR" — sanity
|
||||
-- check duplicate-receipt audit. APIA list shows hectares declared,
|
||||
-- FEGA shows EUR plătiți; ratio EUR/ha → outliers.
|
||||
-- apia.fermieri.cui × anaf.datornici.cui
|
||||
-- = "Fermier (PFA/SRL) cu datorii la stat care primește subvenții APIA"
|
||||
-- — direct red flag.
|
||||
-- apia.fermieri.name (PF, no CUI) × ani.declaratii.persoana_name
|
||||
-- = persoane cu funcții publice care primesc subvenții agricole.
|
||||
-- apia.fermieri.cui × seap.announcements.supplier_cui
|
||||
-- = ferme care iau și subvenții și contracte publice.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS apia;
|
||||
|
||||
-- ── 1. Lista fermieri ─────────────────────────────────────────────────────
|
||||
-- One row per farmer × campaign × comuna × sat. Composite uniqueness chosen
|
||||
-- because data.gov.ro source has no national ID column (no CNP/CUI per row).
|
||||
-- 'name' is raw "NUME PRENUME" string from the published XLSX.
|
||||
CREATE TABLE IF NOT EXISTS apia.fermieri (
|
||||
id bigserial PRIMARY KEY,
|
||||
campaign_year smallint NOT NULL, -- e.g. 2024 (also 2023 SUPRAFATA exists in same row but campaign year is publication year)
|
||||
name text NOT NULL, -- raw "NUME PRENUME" or "SC ... SRL"
|
||||
name_normalized text, -- firms.normalize_company_name(name) — only when looks like PJ
|
||||
cui text, -- only if matched to firms.entities (PJ rows like "SC X SRL")
|
||||
cui_match_method text, -- 'exact_norm' | 'trgm_unique' | NULL
|
||||
cui_match_score numeric(4,3),
|
||||
is_legal_person boolean, -- guessed from name shape (SC, SRL, PFA, II, IF, SA prefixes/suffixes)
|
||||
judet text, -- enriched via centru_apia mapping (Găgești → VS Vaslui)
|
||||
comuna_oras text, -- raw "COMUNA/ORAS" cell
|
||||
sat text,
|
||||
centru_apia text, -- "CENTRUL APIA" (e.g. MURGENI)
|
||||
responsabil_uat text, -- "RESPONSABIL UAT 2024" (the UAT employee, not the farmer)
|
||||
suprafata_ha numeric(12,4), -- "SUPRAFATA 2023" hectares, decimal allowed (e.g. 1.04, 12.45)
|
||||
source_dataset_id text NOT NULL, -- CKAN package_id, e.g. 'lista-fermierilor-campania-apia-2024'
|
||||
source_resource_id text NOT NULL, -- CKAN resource_id (UUID)
|
||||
source_url text NOT NULL, -- direct XLSX download URL
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
-- NULLS NOT DISTINCT: treat NULL sat as a single value so we don't get
|
||||
-- duplicate rows when source omits sat for some farmers.
|
||||
UNIQUE NULLS NOT DISTINCT (campaign_year, name, comuna_oras, sat)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_cui ON apia.fermieri(cui) WHERE cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_year ON apia.fermieri(campaign_year);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_judet ON apia.fermieri(judet);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_comuna ON apia.fermieri(comuna_oras);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_centru ON apia.fermieri(centru_apia);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_resource ON apia.fermieri(source_resource_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_suprafata ON apia.fermieri(suprafata_ha DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_name_trgm ON apia.fermieri USING gin (name_normalized gin_trgm_ops);
|
||||
|
||||
COMMENT ON TABLE apia.fermieri IS
|
||||
'Lista fermierilor publicată de APIA prin UAT-uri pe data.gov.ro. '
|
||||
'Sursă primară: CKAN package "lista-fermierilor-campania-apia-2024". '
|
||||
'Cross-source cu fonduri.afir_plati (FEGA) și anaf.datornici.';
|
||||
COMMENT ON COLUMN apia.fermieri.suprafata_ha IS
|
||||
'Hectare declarate — coloana "SUPRAFATA 2023" (precedent campaign).';
|
||||
COMMENT ON COLUMN apia.fermieri.is_legal_person IS
|
||||
'true = nume conține markeri "SC/SRL/SA/PFA/II/IF/CABINET" → potențial match firms.entities.';
|
||||
COMMENT ON COLUMN apia.fermieri.cui_match_method IS
|
||||
'exact_norm = match exact pe firms.normalize_company_name; '
|
||||
'trgm_unique = match trigram unic peste 0.85; NULL = nepotrivit (probabil PF).';
|
||||
|
||||
-- ── 1b. Staging table (used by importer for COPY → INSERT pipeline) ─────
|
||||
CREATE TABLE IF NOT EXISTS apia.staging_fermieri (
|
||||
campaign_year text,
|
||||
name text,
|
||||
comuna_oras text,
|
||||
sat text,
|
||||
centru_apia text,
|
||||
responsabil_uat text,
|
||||
suprafata_ha text,
|
||||
source_dataset_id text,
|
||||
source_resource_id text,
|
||||
source_url text
|
||||
);
|
||||
|
||||
COMMENT ON TABLE apia.staging_fermieri IS
|
||||
'Tabel de staging pentru importul XLSX→COPY. TRUNCATE între import-uri.';
|
||||
|
||||
-- ── 2. Scrape log ─────────────────────────────────────────────────────────
|
||||
-- One row per CKAN-resource ingest. Useful for "ce am importat când" history.
|
||||
CREATE TABLE IF NOT EXISTS apia.scrape_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
source_dataset_id text NOT NULL,
|
||||
source_resource_id text NOT NULL,
|
||||
source_url text NOT NULL,
|
||||
campaign_year smallint NOT NULL,
|
||||
rows_seen integer NOT NULL DEFAULT 0,
|
||||
rows_inserted integer NOT NULL DEFAULT 0,
|
||||
rows_updated integer NOT NULL DEFAULT 0,
|
||||
rows_matched_cui integer NOT NULL DEFAULT 0,
|
||||
duration_ms integer NOT NULL DEFAULT 0,
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||||
error text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_scrape_log_started ON apia.scrape_log(started_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_scrape_log_resource ON apia.scrape_log(source_resource_id);
|
||||
|
||||
-- ── 3. Materialized view: per-CUI rollup ─────────────────────────────────
|
||||
-- Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY apia.mv_per_cui;
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS apia.mv_per_cui AS
|
||||
SELECT
|
||||
cui,
|
||||
array_agg(DISTINCT campaign_year ORDER BY campaign_year) AS campaign_years,
|
||||
array_agg(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS judete,
|
||||
SUM(suprafata_ha) AS total_suprafata_ha,
|
||||
COUNT(*) AS rows_count,
|
||||
MAX(fetched_at) AS last_seen_at
|
||||
FROM apia.fermieri
|
||||
WHERE cui IS NOT NULL
|
||||
GROUP BY cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_apia_mv_per_cui ON apia.mv_per_cui(cui);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW apia.mv_per_cui IS
|
||||
'Rollup APIA per CUI (doar PJ-uri cu match). '
|
||||
'Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY apia.mv_per_cui.';
|
||||
|
||||
-- ── 4. CUI matcher (call after each ingest) ──────────────────────────────
|
||||
-- Populates apia.fermieri.cui by joining name_normalized against
|
||||
-- firms.entities. Conservative: requires *unique* match in firms (no
|
||||
-- ambiguous trgm collisions).
|
||||
CREATE OR REPLACE FUNCTION apia.match_cui()
|
||||
RETURNS TABLE(matched_count bigint, ambiguous_count bigint) AS $$
|
||||
DECLARE
|
||||
v_matched bigint := 0;
|
||||
v_ambiguous bigint := 0;
|
||||
BEGIN
|
||||
-- Heuristic: row is candidate PJ if name has SRL/SA/PFA/II/IF/SC/CABINET.
|
||||
UPDATE apia.fermieri f
|
||||
SET is_legal_person = TRUE,
|
||||
name_normalized = firms.normalize_company_name(f.name)
|
||||
WHERE f.is_legal_person IS NULL
|
||||
AND f.name ~* '\m(SRL|S\.R\.L\.|S\.A\.|SA|PFA|P\.F\.A\.|II|I\.I\.|IF|I\.F\.|SC|S\.C\.|CABINET|COOPERATIVA|COOP)\M';
|
||||
|
||||
-- Exact-norm match
|
||||
WITH cands AS (
|
||||
SELECT f.id, e.cui
|
||||
FROM apia.fermieri f
|
||||
JOIN firms.entities e
|
||||
ON e.name_normalized = f.name_normalized
|
||||
WHERE f.cui IS NULL
|
||||
AND f.is_legal_person = TRUE
|
||||
AND f.name_normalized IS NOT NULL
|
||||
),
|
||||
uniq AS (
|
||||
SELECT id, MIN(cui) AS cui
|
||||
FROM cands
|
||||
GROUP BY id
|
||||
HAVING COUNT(DISTINCT cui) = 1
|
||||
),
|
||||
upd AS (
|
||||
UPDATE apia.fermieri f
|
||||
SET cui = u.cui,
|
||||
cui_match_method = 'exact_norm',
|
||||
cui_match_score = 1.0
|
||||
FROM uniq u
|
||||
WHERE f.id = u.id
|
||||
RETURNING f.id
|
||||
)
|
||||
SELECT COUNT(*) INTO v_matched FROM upd;
|
||||
|
||||
-- Trigram fallback for unmatched PJs (threshold 0.85, must be unique)
|
||||
WITH cands AS (
|
||||
SELECT f.id, e.cui,
|
||||
similarity(e.name_normalized, f.name_normalized) AS sim
|
||||
FROM apia.fermieri f
|
||||
JOIN firms.entities e
|
||||
ON e.name_normalized % f.name_normalized
|
||||
WHERE f.cui IS NULL
|
||||
AND f.is_legal_person = TRUE
|
||||
AND f.name_normalized IS NOT NULL
|
||||
AND similarity(e.name_normalized, f.name_normalized) >= 0.85
|
||||
),
|
||||
ranked AS (
|
||||
SELECT id, cui, sim,
|
||||
COUNT(*) OVER (PARTITION BY id) AS n_cands
|
||||
FROM cands
|
||||
),
|
||||
uniq AS (
|
||||
SELECT DISTINCT ON (id) id, cui, sim
|
||||
FROM ranked
|
||||
WHERE n_cands = 1
|
||||
),
|
||||
upd AS (
|
||||
UPDATE apia.fermieri f
|
||||
SET cui = u.cui,
|
||||
cui_match_method = 'trgm_unique',
|
||||
cui_match_score = u.sim
|
||||
FROM uniq u
|
||||
WHERE f.id = u.id
|
||||
RETURNING f.id
|
||||
)
|
||||
SELECT COUNT(*) INTO v_ambiguous FROM upd;
|
||||
|
||||
matched_count := v_matched;
|
||||
ambiguous_count := v_ambiguous;
|
||||
RETURN NEXT;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
COMMENT ON FUNCTION apia.match_cui() IS
|
||||
'Matchează apia.fermieri.cui prin firms.entities. Întâi exact_norm, apoi '
|
||||
'trgm fallback >=0.85 cu unicitate. Returnează (matched_exact, matched_trgm). '
|
||||
'Doar rândurile marcate is_legal_person.';
|
||||
@@ -0,0 +1,115 @@
|
||||
-- 037_gnm.sql
|
||||
-- GNM — Garda Națională de Mediu.
|
||||
-- Public press releases of environmental enforcement actions scraped from
|
||||
-- gnm.ro/feed/ (WordPress RSS, ~358 items / 36 pages).
|
||||
--
|
||||
-- Investigation summary (2026-05-10):
|
||||
-- • The institution publishes only AGGREGATE statistics (per-month / per-judet
|
||||
-- totals) in their monthly synthesis PDFs (sinteza_<luna>_<an>.pdf) and the
|
||||
-- annual activity report (raport_activitate_<an>.pdf). NO per-firm registry
|
||||
-- is published with CUIs and individual fine amounts.
|
||||
-- • The only place where individual violators are named is in press releases
|
||||
-- ("comunicate de presă"). Even there:
|
||||
-- – Most releases reference "doi operatori", "șapte operatori în patru
|
||||
-- județe" without naming firms.
|
||||
-- – When firms are named (e.g. Petrobrazi, Vega, Lukoil refineries), the
|
||||
-- individual amount is rarely broken down — they receive a collective
|
||||
-- "€340,000 în ultimul an" figure.
|
||||
-- – CUIs are NEVER published; we must fuzzy-match on company name +
|
||||
-- judet via cui_matcher (Stage B of the pipeline).
|
||||
-- • data.gov.ro has 0 GNM datasets; ANPM publishes IPPC/SEVESO inventories
|
||||
-- (which we ingest separately) but no fines.
|
||||
--
|
||||
-- Conclusion: this is a partial / sample-quality dataset. We capture every
|
||||
-- press release as gnm.communicate, then run a regex extractor to surface
|
||||
-- candidate (company, fine_lei, fapta) tuples into gnm.amenzi_extrase. The
|
||||
-- coverage will be ~5-15% of total GNM enforcement activity (estimated 5K
|
||||
-- fines/year, of which only ~50-200 firms are named publicly per year).
|
||||
--
|
||||
-- The cross-source value remains: any firm publicly shamed by GNM that ALSO
|
||||
-- wins SEAP construction/industrial contracts is a 1st-page scandal pattern.
|
||||
-- We accept that we miss the long tail; we capture the headlines.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS gnm;
|
||||
|
||||
-- ── 1. Press releases (one row per gnm.ro article) ─────────────────────────
|
||||
-- Captures the full enforcement-related communicate published by GNM. Used
|
||||
-- both as raw archive (in case interpretation rules change) and as parent
|
||||
-- for extracted violator rows.
|
||||
CREATE TABLE IF NOT EXISTS gnm.comunicate (
|
||||
id bigserial PRIMARY KEY,
|
||||
guid text NOT NULL UNIQUE, -- WordPress GUID (stable post id)
|
||||
url text NOT NULL,
|
||||
titlu text NOT NULL,
|
||||
publicat_la timestamptz, -- pubDate from RSS
|
||||
autor text, -- dc:creator
|
||||
categorii text[], -- e.g. {COMUNICATE DE PRESĂ, NOUTĂȚI}
|
||||
continut_html text, -- raw content:encoded
|
||||
continut_text text, -- HTML-stripped, line-collapsed
|
||||
is_enforcement boolean NOT NULL DEFAULT false,
|
||||
-- true if title/body matches
|
||||
-- /amenz|sancțiun|sancțiun|sistare|confiscat/i
|
||||
total_amenzi_lei numeric, -- sum mentioned in article (best-effort)
|
||||
raw_hash text NOT NULL, -- sha1(continut_text) for change detection
|
||||
fetched_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_gnm_com_publicat ON gnm.comunicate(publicat_la DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_gnm_com_enforcement ON gnm.comunicate(is_enforcement) WHERE is_enforcement;
|
||||
CREATE INDEX IF NOT EXISTS idx_gnm_com_total_amenzi ON gnm.comunicate(total_amenzi_lei DESC NULLS LAST);
|
||||
|
||||
COMMENT ON TABLE gnm.comunicate IS
|
||||
'GNM press releases (gnm.ro/feed/). Source-of-truth raw archive. Articles flagged is_enforcement when text mentions fines/sanctions; gnm.amenzi_extrase populated by NLP-light extractor.';
|
||||
|
||||
-- ── 2. Extracted violator records ──────────────────────────────────────────
|
||||
-- One row per (article × candidate firm) tuple identified by the regex/NER
|
||||
-- pass. Most enforcement articles have 0-3 firms named; some have none
|
||||
-- (collective references like "operatori industriali din Prahova").
|
||||
--
|
||||
-- contravenient_cui is filled by Stage B fuzzy match against firms.cui_lookup
|
||||
-- using contravenient_name + judet hint. Score ≥ 0.85 is acceptable.
|
||||
CREATE TABLE IF NOT EXISTS gnm.amenzi_extrase (
|
||||
id bigserial PRIMARY KEY,
|
||||
comunicat_id bigint NOT NULL REFERENCES gnm.comunicate(id) ON DELETE CASCADE,
|
||||
contravenient_name text NOT NULL, -- raw mention (e.g. "Rafinăria Petrobrazi")
|
||||
contravenient_name_norm text, -- firms.normalize_company_name(); NULL until Stage B
|
||||
contravenient_cui text, -- fuzzy-matched, NULL when unmatched
|
||||
cui_match_method text, -- 'direct' | 'fuzzy_name' | 'fuzzy_name_judet' | NULL
|
||||
cui_match_score numeric, -- 0..1
|
||||
matched_at timestamptz,
|
||||
judet text, -- inferred from article title/body
|
||||
fapta text, -- short violation description (extracted snippet)
|
||||
suma_lei numeric, -- per-firm amount when present, NULL when only aggregate
|
||||
suma_eur numeric, -- when source quotes EUR (rare)
|
||||
suma_aggregate boolean NOT NULL DEFAULT false,
|
||||
-- true when amount applies to >1 firm collectively
|
||||
context_snippet text NOT NULL, -- the sentence(s) that triggered extraction
|
||||
fetched_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_gnm_amenzi_cui ON gnm.amenzi_extrase(contravenient_cui)
|
||||
WHERE contravenient_cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_gnm_amenzi_name_norm ON gnm.amenzi_extrase(contravenient_name_norm);
|
||||
CREATE INDEX IF NOT EXISTS idx_gnm_amenzi_judet ON gnm.amenzi_extrase(judet);
|
||||
CREATE INDEX IF NOT EXISTS idx_gnm_amenzi_suma ON gnm.amenzi_extrase(suma_lei DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_gnm_amenzi_comunicat ON gnm.amenzi_extrase(comunicat_id);
|
||||
|
||||
COMMENT ON TABLE gnm.amenzi_extrase IS
|
||||
'Best-effort extracted violator tuples from gnm.comunicate. Coverage is partial — only firms named in press releases. Use gnm.comunicate.is_enforcement for full enforcement-article archive.';
|
||||
|
||||
-- ── 3. Scrape log (mirrors anre/ancom convention) ──────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS gnm.scrape_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
scraper text NOT NULL, -- 'rss_feed' | 'extractor'
|
||||
source_url text NOT NULL,
|
||||
rows_seen integer NOT NULL DEFAULT 0,
|
||||
rows_inserted integer NOT NULL DEFAULT 0,
|
||||
rows_updated integer NOT NULL DEFAULT 0,
|
||||
rows_skipped integer NOT NULL DEFAULT 0,
|
||||
duration_ms integer NOT NULL DEFAULT 0,
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||||
error text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_gnm_scrape_log_started ON gnm.scrape_log(started_at DESC);
|
||||
@@ -0,0 +1,98 @@
|
||||
-- 038_bugetar_cui_match_stage_b.sql
|
||||
-- Stage B (fuzzy) CUI match for bugetar.entitate using judet constraint.
|
||||
-- After this runs, expected post-match coverage: 58% → ~70-75% (estimate
|
||||
-- ~1,500 new matches added via trgm_judet method at threshold 0.78).
|
||||
--
|
||||
-- IMPORTANT: this is a LONG-RUNNING SQL (~30-60 min on full table) because
|
||||
-- it does fuzzy matching across ~7,857 unmatched bugetar rows × millions of
|
||||
-- firms.entities rows. It's batched per judet to keep the query plan tractable.
|
||||
--
|
||||
-- Idempotent — UPDATEs only WHERE cui IS NULL.
|
||||
-- Run via: psql -f 038_bugetar_cui_match_stage_b.sql
|
||||
|
||||
-- Map: 2-letter judet code → uppercase-ASCII variant present in firms.adr_judet
|
||||
-- (firms uses old cedilla characters Ş/Ţ; translate strips diacritics to ASCII).
|
||||
DROP TABLE IF EXISTS tmp_judet_map;
|
||||
CREATE TEMP TABLE tmp_judet_map (code text PRIMARY KEY, name_ascii text NOT NULL);
|
||||
INSERT INTO tmp_judet_map VALUES
|
||||
('AB','ALBA'),('AG','ARGES'),('AR','ARAD'),('B','MUNICIPIUL BUCURESTI'),
|
||||
('BC','BACAU'),('BH','BIHOR'),('BN','BISTRITA-NASAUD'),('BR','BRAILA'),
|
||||
('BT','BOTOSANI'),('BV','BRASOV'),('BZ','BUZAU'),('CJ','CLUJ'),
|
||||
('CL','CALARASI'),('CS','CARAS-SEVERIN'),('CT','CONSTANTA'),('CV','COVASNA'),
|
||||
('DB','DAMBOVITA'),('DJ','DOLJ'),('GJ','GORJ'),('GL','GALATI'),
|
||||
('GR','GIURGIU'),('HD','HUNEDOARA'),('HR','HARGHITA'),('IF','ILFOV'),
|
||||
('IL','IALOMITA'),('IS','IASI'),('MH','MEHEDINTI'),('MM','MARAMURES'),
|
||||
('MS','MURES'),('NT','NEAMT'),('OT','OLT'),('PH','PRAHOVA'),
|
||||
('SB','SIBIU'),('SJ','SALAJ'),('SM','SATU MARE'),('SV','SUCEAVA'),
|
||||
('TL','TULCEA'),('TM','TIMIS'),('TR','TELEORMAN'),('VL','VALCEA'),
|
||||
('VN','VRANCEA'),('VS','VASLUI');
|
||||
|
||||
-- Materialize a per-judet view of firms.entities with ASCII-normalized adr_judet
|
||||
-- so the trgm join doesn't recompute translate() per probe.
|
||||
DROP TABLE IF EXISTS tmp_firms_by_judet;
|
||||
CREATE TEMP TABLE tmp_firms_by_judet AS
|
||||
SELECT cui, name, name_normalized,
|
||||
UPPER(translate(COALESCE(adr_judet,''), 'ŞȘŢȚăâîĂÂÎ', 'SSTTAAIAAI')) AS judet_ascii
|
||||
FROM firms.entities
|
||||
WHERE name_normalized IS NOT NULL AND adr_judet IS NOT NULL;
|
||||
CREATE INDEX ON tmp_firms_by_judet (judet_ascii);
|
||||
CREATE INDEX ON tmp_firms_by_judet USING gin (name_normalized gin_trgm_ops);
|
||||
ANALYZE tmp_firms_by_judet;
|
||||
|
||||
-- Materialize unmatched bugetar rows with normalized names + expected judet
|
||||
DROP TABLE IF EXISTS tmp_bugetar_unmatched;
|
||||
CREATE TEMP TABLE tmp_bugetar_unmatched AS
|
||||
SELECT b.id, b.entity_name, jm.name_ascii AS expected_judet,
|
||||
firms.normalize_company_name(b.entity_name) AS bn_norm
|
||||
FROM bugetar.entitate b
|
||||
JOIN tmp_judet_map jm ON jm.code = b.judet
|
||||
WHERE b.cui IS NULL;
|
||||
CREATE INDEX ON tmp_bugetar_unmatched (expected_judet);
|
||||
|
||||
-- Per-judet UPDATE loop using DO block (sequential per judet → bounded planner cost)
|
||||
SET pg_trgm.similarity_threshold = 0.78;
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
jcode text;
|
||||
jname text;
|
||||
updated integer;
|
||||
total_updated integer := 0;
|
||||
BEGIN
|
||||
FOR jcode, jname IN SELECT code, name_ascii FROM tmp_judet_map ORDER BY code LOOP
|
||||
WITH unmatched_in_judet AS (
|
||||
SELECT id, bn_norm FROM tmp_bugetar_unmatched WHERE expected_judet = jname
|
||||
),
|
||||
candidates AS (
|
||||
SELECT DISTINCT ON (u.id) u.id, f.cui,
|
||||
similarity(f.name_normalized, u.bn_norm) AS score
|
||||
FROM unmatched_in_judet u
|
||||
JOIN tmp_firms_by_judet f
|
||||
ON f.judet_ascii = jname AND f.name_normalized % u.bn_norm
|
||||
ORDER BY u.id, similarity(f.name_normalized, u.bn_norm) DESC
|
||||
)
|
||||
UPDATE bugetar.entitate b
|
||||
SET cui = c.cui,
|
||||
cui_match_score = c.score::numeric(5,2),
|
||||
cui_match_method = 'trgm_judet',
|
||||
updated_at = now()
|
||||
FROM candidates c
|
||||
WHERE b.id = c.id AND b.cui IS NULL AND c.score >= 0.78;
|
||||
|
||||
GET DIAGNOSTICS updated = ROW_COUNT;
|
||||
total_updated := total_updated + updated;
|
||||
RAISE NOTICE ' judet=% (%) matched=%', jcode, jname, updated;
|
||||
END LOOP;
|
||||
RAISE NOTICE '=== total trgm_judet matches: % ===', total_updated;
|
||||
END $$;
|
||||
|
||||
-- Final stats
|
||||
SELECT cui_match_method, count(*),
|
||||
round(avg(cui_match_score)::numeric, 2) AS avg_score
|
||||
FROM bugetar.entitate
|
||||
WHERE cui IS NOT NULL
|
||||
GROUP BY 1
|
||||
ORDER BY 2 DESC;
|
||||
|
||||
-- Cleanup
|
||||
DROP TABLE tmp_judet_map, tmp_firms_by_judet, tmp_bugetar_unmatched;
|
||||
@@ -0,0 +1,62 @@
|
||||
-- 039_bugetar_uat_pattern_match.sql
|
||||
-- High-precision CUI match for bugetar.entitate UAT entries by stripping
|
||||
-- ONRC parenthetical suffix "X (PRIMARIA Y)" and comparing normalized names.
|
||||
--
|
||||
-- Replaces the trgm-based 038 attempt which was too slow (90+ min, low yield)
|
||||
-- and hit false positives on the COMUNA-PRIMARIA naming gap.
|
||||
--
|
||||
-- Insight: ONRC stores comune/orașe with parenthetical suffix:
|
||||
-- "Comuna Surduc (Primaria Comunei Surduc)"
|
||||
-- "COMUNA CIZER (PRIMARIA)"
|
||||
-- "Comuna Mesesenii de Jos (Primaria Mesesenii de Jos Jud. Salaj)"
|
||||
-- Bugetar entries are clean: "COMUNA SURDUC". Stripping ' (...)' from ONRC
|
||||
-- name and comparing normalized → exact match → high-confidence resolve.
|
||||
--
|
||||
-- Idempotent: UPDATEs only WHERE cui IS NULL. Threshold-agnostic.
|
||||
-- Test runtime: ~1.7s per judet, 42 judete → ~70s total.
|
||||
|
||||
\timing on
|
||||
SET pg_trgm.similarity_threshold = 0.78; -- not used here but reset for safety
|
||||
|
||||
WITH judet_map AS (
|
||||
SELECT * FROM (VALUES
|
||||
('AB','ALBA'),('AG','ARGES'),('AR','ARAD'),('B','MUNICIPIUL BUCURESTI'),
|
||||
('BC','BACAU'),('BH','BIHOR'),('BN','BISTRITA-NASAUD'),('BR','BRAILA'),
|
||||
('BT','BOTOSANI'),('BV','BRASOV'),('BZ','BUZAU'),('CJ','CLUJ'),
|
||||
('CL','CALARASI'),('CS','CARAS-SEVERIN'),('CT','CONSTANTA'),('CV','COVASNA'),
|
||||
('DB','DAMBOVITA'),('DJ','DOLJ'),('GJ','GORJ'),('GL','GALATI'),
|
||||
('GR','GIURGIU'),('HD','HUNEDOARA'),('HR','HARGHITA'),('IF','ILFOV'),
|
||||
('IL','IALOMITA'),('IS','IASI'),('MH','MEHEDINTI'),('MM','MARAMURES'),
|
||||
('MS','MURES'),('NT','NEAMT'),('OT','OLT'),('PH','PRAHOVA'),
|
||||
('SB','SIBIU'),('SJ','SALAJ'),('SM','SATU MARE'),('SV','SUCEAVA'),
|
||||
('TL','TULCEA'),('TM','TIMIS'),('TR','TELEORMAN'),('VL','VALCEA'),
|
||||
('VN','VRANCEA'),('VS','VASLUI')
|
||||
) AS m(code, name_ascii)
|
||||
),
|
||||
candidates AS (
|
||||
SELECT DISTINCT ON (b.id) b.id, e.cui, e.name AS firm_name
|
||||
FROM bugetar.entitate b
|
||||
JOIN judet_map jm ON jm.code = b.judet
|
||||
JOIN firms.entities e
|
||||
ON firms.normalize_company_name(regexp_replace(e.name, '\s*\(.*$', '')) = firms.normalize_company_name(b.entity_name)
|
||||
AND UPPER(translate(COALESCE(e.adr_judet,''), 'ŞȘŢȚăâîĂÂÎ', 'SSTTAAIAAI')) = jm.name_ascii
|
||||
WHERE b.cui IS NULL
|
||||
ORDER BY b.id, e.cui -- deterministic when multiple firms share normalized stripped name
|
||||
)
|
||||
UPDATE bugetar.entitate b
|
||||
SET cui = c.cui,
|
||||
cui_match_score = 0.95::numeric(5,2), -- high-confidence stripped-exact match
|
||||
cui_match_method = 'uat_pattern',
|
||||
updated_at = now()
|
||||
FROM candidates c
|
||||
WHERE b.id = c.id AND b.cui IS NULL;
|
||||
|
||||
-- Final stats
|
||||
SELECT cui_match_method, count(*),
|
||||
round(avg(cui_match_score)::numeric, 2) AS avg_score
|
||||
FROM bugetar.entitate
|
||||
WHERE cui IS NOT NULL
|
||||
GROUP BY 1
|
||||
ORDER BY 2 DESC;
|
||||
|
||||
SELECT 'unmatched' AS t, count(*) FROM bugetar.entitate WHERE cui IS NULL;
|
||||
@@ -0,0 +1,105 @@
|
||||
-- 040_curteacont_uat_pattern_match.sql
|
||||
-- High-precision CUI match for curteacont.rapoarte using UAT-pattern + strip-parens.
|
||||
--
|
||||
-- Curtea de Conturi uses specific abbreviations for territorial units:
|
||||
-- UATC X → COMUNA X
|
||||
-- UATJ X → JUDETUL X
|
||||
-- UATO X → ORASUL X / ORAS X
|
||||
-- UATM X → MUNICIPIUL X
|
||||
-- UAT SECTOR N (... BUCURESTI) → SECTOR N
|
||||
--
|
||||
-- ONRC stores these with parenthetical suffix indicating the operating body:
|
||||
-- "JUDETUL MARAMURES (CONSILIUL JUDETEAN MARAMURES)"
|
||||
-- "Comuna Surduc (Primaria Comunei Surduc)"
|
||||
-- "SECTOR 3 (PRIMARIA SECTOR 3 BUCURESTI)"
|
||||
--
|
||||
-- Strip ONRC " (...)" suffix and compare normalized → exact match.
|
||||
--
|
||||
-- Idempotent: UPDATEs only WHERE audited_entity_cui IS NULL.
|
||||
|
||||
\timing on
|
||||
SET pg_trgm.similarity_threshold = 0.78; -- safety reset
|
||||
|
||||
-- Build a small prefiltered firms table once (UATs only ~10K rows)
|
||||
DROP TABLE IF EXISTS tmp_firms_uat;
|
||||
CREATE TEMP TABLE tmp_firms_uat AS
|
||||
SELECT cui, name,
|
||||
firms.normalize_company_name(regexp_replace(name, '\s*\(.*$', '')) AS norm_stripped
|
||||
FROM firms.entities
|
||||
WHERE name ~* '^(COMUNA |JUDETUL |ORAS |ORASUL |MUNICIPIUL |SECTOR(UL)? [1-6])';
|
||||
CREATE INDEX ON tmp_firms_uat (norm_stripped);
|
||||
ANALYZE tmp_firms_uat;
|
||||
|
||||
-- Compute expected ONRC-form name for each cdc audited entity
|
||||
DROP TABLE IF EXISTS tmp_cdc_uat;
|
||||
CREATE TEMP TABLE tmp_cdc_uat AS
|
||||
SELECT slug_id, audited_entity_name,
|
||||
firms.normalize_company_name(
|
||||
CASE
|
||||
WHEN audited_entity_name ~* '^UATC '
|
||||
THEN 'COMUNA ' || regexp_replace(audited_entity_name, '^UATC\s+', '', 'i')
|
||||
WHEN audited_entity_name ~* '^UATJ '
|
||||
THEN 'JUDETUL ' || regexp_replace(audited_entity_name, '^UATJ\s+', '', 'i')
|
||||
WHEN audited_entity_name ~* '^UATO '
|
||||
THEN 'ORAS ' || regexp_replace(audited_entity_name, '^UATO\s+', '', 'i')
|
||||
WHEN audited_entity_name ~* '^UATM '
|
||||
THEN 'MUNICIPIUL ' || regexp_replace(audited_entity_name, '^UATM\s+', '', 'i')
|
||||
WHEN audited_entity_name ~* '^UAT SECTOR(UL)? [1-6]'
|
||||
THEN 'SECTOR ' || substring(audited_entity_name FROM '^UAT SECTOR(?:UL)? ([1-6])')
|
||||
ELSE NULL
|
||||
END
|
||||
) AS expected_norm
|
||||
FROM curteacont.rapoarte
|
||||
WHERE audited_entity_cui IS NULL
|
||||
AND audited_entity_name IS NOT NULL
|
||||
AND audited_entity_name ~* '^(UATC |UATJ |UATO |UATM |UAT SECTOR)';
|
||||
|
||||
-- Stats before update
|
||||
SELECT count(*) AS unmapped_uat_rows FROM tmp_cdc_uat WHERE expected_norm IS NOT NULL;
|
||||
|
||||
-- Apply the match
|
||||
WITH candidates AS (
|
||||
SELECT DISTINCT ON (c.slug_id) c.slug_id, f.cui
|
||||
FROM tmp_cdc_uat c
|
||||
JOIN tmp_firms_uat f ON f.norm_stripped = c.expected_norm
|
||||
ORDER BY c.slug_id, f.cui
|
||||
)
|
||||
UPDATE curteacont.rapoarte r
|
||||
SET audited_entity_cui = c.cui,
|
||||
parsed_at = COALESCE(r.parsed_at, now())
|
||||
FROM candidates c
|
||||
WHERE r.slug_id = c.slug_id AND r.audited_entity_cui IS NULL;
|
||||
|
||||
-- Also try a fallback exact-match path for non-UAT names (ministries etc.)
|
||||
-- Match audited_entity_name directly to firms.entities.name with strip-parens.
|
||||
WITH cdc_non_uat AS (
|
||||
SELECT slug_id, audited_entity_name,
|
||||
firms.normalize_company_name(audited_entity_name) AS norm
|
||||
FROM curteacont.rapoarte
|
||||
WHERE audited_entity_cui IS NULL
|
||||
AND audited_entity_name IS NOT NULL
|
||||
AND audited_entity_name !~* '^(UATC |UATJ |UATO |UATM |UAT SECTOR)'
|
||||
),
|
||||
candidates2 AS (
|
||||
SELECT DISTINCT ON (c.slug_id) c.slug_id, e.cui
|
||||
FROM cdc_non_uat c
|
||||
JOIN firms.entities e
|
||||
ON firms.normalize_company_name(regexp_replace(e.name, '\s*\(.*$', '')) = c.norm
|
||||
ORDER BY c.slug_id, e.cui
|
||||
)
|
||||
UPDATE curteacont.rapoarte r
|
||||
SET audited_entity_cui = c.cui,
|
||||
parsed_at = COALESCE(r.parsed_at, now())
|
||||
FROM candidates2 c
|
||||
WHERE r.slug_id = c.slug_id AND r.audited_entity_cui IS NULL;
|
||||
|
||||
-- Final stats
|
||||
SELECT count(*) AS total,
|
||||
count(audited_entity_cui) AS with_cui,
|
||||
round(100.0 * count(audited_entity_cui) / count(*), 1) AS pct
|
||||
FROM curteacont.rapoarte;
|
||||
|
||||
-- Refresh the per-audited rollup if exists (no MV defined yet for curteacont but
|
||||
-- the autoritate profile pulls live; no refresh needed)
|
||||
|
||||
DROP TABLE tmp_firms_uat, tmp_cdc_uat;
|
||||
@@ -0,0 +1,87 @@
|
||||
-- 041_curteacont_cleaned_name_match.sql
|
||||
-- Follow-up to 040 — handles the residual 131 rows whose audited_entity_name
|
||||
-- contains the prefix " ) privind raportul de audit al performantei nr.X, ENTITY"
|
||||
-- (scraper bug: parser kept the prefix instead of just the entity).
|
||||
--
|
||||
-- Extract the entity name via split-on-last-comma, then retry both
|
||||
-- UAT-pattern and strip-parens match.
|
||||
--
|
||||
-- Source bug should also be fixed in services/seap-scraper/src/scrape-curteacont.ts
|
||||
-- but that's a separate task; SQL repair lands the data improvement immediately.
|
||||
|
||||
\timing on
|
||||
|
||||
DROP TABLE IF EXISTS tmp_cdc_residue;
|
||||
CREATE TEMP TABLE tmp_cdc_residue AS
|
||||
SELECT slug_id, audited_entity_name,
|
||||
trim(reverse(split_part(reverse(audited_entity_name), ',', 1))) AS clean_name
|
||||
FROM curteacont.rapoarte
|
||||
WHERE audited_entity_cui IS NULL
|
||||
AND audited_entity_name IS NOT NULL
|
||||
AND audited_entity_name ~ '\) privind raportul';
|
||||
|
||||
-- Pass 1: UAT-pattern on cleaned names
|
||||
WITH cleaned AS (
|
||||
SELECT slug_id, clean_name,
|
||||
firms.normalize_company_name(
|
||||
CASE
|
||||
WHEN clean_name ~* '^UATC ' THEN 'COMUNA ' || regexp_replace(clean_name, '^UATC\s+', '', 'i')
|
||||
WHEN clean_name ~* '^UATJ ' THEN 'JUDETUL ' || regexp_replace(clean_name, '^UATJ\s+', '', 'i')
|
||||
WHEN clean_name ~* '^UATO ' THEN 'ORAS ' || regexp_replace(clean_name, '^UATO\s+', '', 'i')
|
||||
WHEN clean_name ~* '^UATM ' THEN 'MUNICIPIUL ' || regexp_replace(clean_name, '^UATM\s+', '', 'i')
|
||||
WHEN clean_name ~* '^UAT SECTOR(UL)? [1-6]'
|
||||
THEN 'SECTOR ' || substring(clean_name FROM '^UAT SECTOR(?:UL)? ([1-6])')
|
||||
ELSE NULL
|
||||
END
|
||||
) AS expected_norm
|
||||
FROM tmp_cdc_residue
|
||||
),
|
||||
firms_uat AS (
|
||||
SELECT cui, firms.normalize_company_name(regexp_replace(name, '\s*\(.*$', '')) AS norm_stripped
|
||||
FROM firms.entities
|
||||
WHERE name ~* '^(COMUNA |JUDETUL |ORAS |ORASUL |MUNICIPIUL |SECTOR(UL)? [1-6])'
|
||||
),
|
||||
candidates AS (
|
||||
SELECT DISTINCT ON (c.slug_id) c.slug_id, f.cui
|
||||
FROM cleaned c
|
||||
JOIN firms_uat f ON f.norm_stripped = c.expected_norm
|
||||
WHERE c.expected_norm IS NOT NULL
|
||||
ORDER BY c.slug_id, f.cui
|
||||
)
|
||||
UPDATE curteacont.rapoarte r
|
||||
SET audited_entity_cui = c.cui,
|
||||
audited_entity_name = trim(reverse(split_part(reverse(r.audited_entity_name), ',', 1))), -- also fix the name field
|
||||
parsed_at = COALESCE(r.parsed_at, now())
|
||||
FROM candidates c
|
||||
WHERE r.slug_id = c.slug_id AND r.audited_entity_cui IS NULL;
|
||||
|
||||
-- Pass 2: strip-parens exact on cleaned name + ONRC stripped name
|
||||
WITH cleaned AS (
|
||||
SELECT slug_id,
|
||||
trim(reverse(split_part(reverse(audited_entity_name), ',', 1))) AS clean_name
|
||||
FROM curteacont.rapoarte
|
||||
WHERE audited_entity_cui IS NULL
|
||||
AND audited_entity_name ~ '\) privind raportul'
|
||||
),
|
||||
candidates2 AS (
|
||||
SELECT DISTINCT ON (c.slug_id) c.slug_id, e.cui
|
||||
FROM cleaned c
|
||||
JOIN firms.entities e
|
||||
ON firms.normalize_company_name(regexp_replace(e.name, '\s*\(.*$', ''))
|
||||
= firms.normalize_company_name(regexp_replace(c.clean_name, '\s*\(.*$', ''))
|
||||
ORDER BY c.slug_id, e.cui
|
||||
)
|
||||
UPDATE curteacont.rapoarte r
|
||||
SET audited_entity_cui = c.cui,
|
||||
audited_entity_name = trim(reverse(split_part(reverse(r.audited_entity_name), ',', 1))),
|
||||
parsed_at = COALESCE(r.parsed_at, now())
|
||||
FROM candidates2 c
|
||||
WHERE r.slug_id = c.slug_id AND r.audited_entity_cui IS NULL;
|
||||
|
||||
-- Final stats
|
||||
SELECT count(*) AS total,
|
||||
count(audited_entity_cui) AS with_cui,
|
||||
round(100.0 * count(audited_entity_cui) / count(*), 1) AS pct
|
||||
FROM curteacont.rapoarte;
|
||||
|
||||
DROP TABLE tmp_cdc_residue;
|
||||
@@ -0,0 +1,101 @@
|
||||
-- 042_cnsc_authority_cui_match.sql
|
||||
-- Backfill authority_cuis array on cnsc.decizii using the strip-parens +
|
||||
-- UAT-pattern strategy proven by bugetar 039 + curteacont 040.
|
||||
--
|
||||
-- Current state: 29,488 decizii, 12,527 (42%) have authority_cuis populated
|
||||
-- via the scraper's authority_cui_raw extraction. Remaining 16,961 (58%)
|
||||
-- have authority_name but no CUI.
|
||||
--
|
||||
-- CNSC names use these patterns:
|
||||
-- COMUNA X / ORASUL X / MUNICIPIUL X / JUDETUL X → UAT direct
|
||||
-- PRIMARIA COMUNEI X / PRIMARIA X → strip PRIMARIA, try UAT
|
||||
-- CONSILIUL JUDETEAN X / CJ X → "CONSILIUL JUDETEAN X" / "JUDETUL X"
|
||||
-- <COMPANY NAME> SA / SRL → direct firm name match
|
||||
-- <institution> — strip-parens fallback
|
||||
--
|
||||
-- This SQL UPDATEs authority_cuis = ARRAY[cui]::text[] when a match is found.
|
||||
-- The mv_per_authority_cui must be refreshed afterward.
|
||||
--
|
||||
-- Idempotent: only updates rows where authority_cuis IS NULL or empty.
|
||||
|
||||
\timing on
|
||||
|
||||
-- Build the UAT firm cache once (reusable across passes)
|
||||
DROP TABLE IF EXISTS tmp_firms_uat;
|
||||
CREATE TEMP TABLE tmp_firms_uat AS
|
||||
SELECT cui, name,
|
||||
firms.normalize_company_name(regexp_replace(name, '\s*\(.*$', '')) AS norm_stripped
|
||||
FROM firms.entities
|
||||
WHERE name ~* '^(COMUNA |JUDETUL |ORAS |ORASUL |MUNICIPIUL |SECTOR(UL)? [1-6]|CONSILIUL JUDETEAN |PRIMARIA )';
|
||||
CREATE INDEX ON tmp_firms_uat (norm_stripped);
|
||||
ANALYZE tmp_firms_uat;
|
||||
|
||||
-- Unmatched authority rows with normalized expected forms
|
||||
DROP TABLE IF EXISTS tmp_cnsc_unmatched;
|
||||
CREATE TEMP TABLE tmp_cnsc_unmatched AS
|
||||
SELECT id, authority_name,
|
||||
-- Expected ONRC-form normalized name (try several patterns; pick best one):
|
||||
firms.normalize_company_name(authority_name) AS direct_norm,
|
||||
firms.normalize_company_name(
|
||||
regexp_replace(authority_name, '^PRIMARIA\s+(COMUNEI\s+|ORASULUI\s+|MUNICIPIULUI\s+|JUDETULUI\s+)?', '', 'i')
|
||||
) AS primaria_stripped_norm,
|
||||
firms.normalize_company_name(
|
||||
CASE WHEN authority_name ~* '^CONSILIUL JUDETEAN '
|
||||
THEN 'JUDETUL ' || regexp_replace(authority_name, '^CONSILIUL JUDETEAN\s+', '', 'i')
|
||||
ELSE NULL END
|
||||
) AS cj_norm
|
||||
FROM cnsc.decizii
|
||||
WHERE (authority_cuis IS NULL OR array_length(authority_cuis, 1) IS NULL)
|
||||
AND authority_name IS NOT NULL;
|
||||
CREATE INDEX ON tmp_cnsc_unmatched (direct_norm);
|
||||
|
||||
-- Pass 1: direct strip-parens match (firm name matches CNSC authority_name)
|
||||
WITH candidates AS (
|
||||
SELECT DISTINCT ON (u.id) u.id, e.cui
|
||||
FROM tmp_cnsc_unmatched u
|
||||
JOIN firms.entities e
|
||||
ON firms.normalize_company_name(regexp_replace(e.name, '\s*\(.*$', '')) = u.direct_norm
|
||||
ORDER BY u.id, e.cui
|
||||
)
|
||||
UPDATE cnsc.decizii d
|
||||
SET authority_cuis = ARRAY[c.cui]::text[]
|
||||
FROM candidates c
|
||||
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
|
||||
|
||||
-- Pass 2: PRIMARIA-stripped match (PRIMARIA COMUNEI X → match COMUNA X / ORAS X etc.)
|
||||
WITH candidates AS (
|
||||
SELECT DISTINCT ON (u.id) u.id, f.cui
|
||||
FROM tmp_cnsc_unmatched u
|
||||
JOIN tmp_firms_uat f ON f.norm_stripped = u.primaria_stripped_norm
|
||||
WHERE u.primaria_stripped_norm != u.direct_norm -- only when PRIMARIA-strip changed the name
|
||||
ORDER BY u.id, f.cui
|
||||
)
|
||||
UPDATE cnsc.decizii d
|
||||
SET authority_cuis = ARRAY[c.cui]::text[]
|
||||
FROM candidates c
|
||||
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
|
||||
|
||||
-- Pass 3: CONSILIUL JUDETEAN X → JUDETUL X
|
||||
WITH candidates AS (
|
||||
SELECT DISTINCT ON (u.id) u.id, f.cui
|
||||
FROM tmp_cnsc_unmatched u
|
||||
JOIN tmp_firms_uat f ON f.norm_stripped = u.cj_norm
|
||||
WHERE u.cj_norm IS NOT NULL
|
||||
ORDER BY u.id, f.cui
|
||||
)
|
||||
UPDATE cnsc.decizii d
|
||||
SET authority_cuis = ARRAY[c.cui]::text[]
|
||||
FROM candidates c
|
||||
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
|
||||
|
||||
-- Refresh the per-authority MV
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui;
|
||||
|
||||
-- Final stats
|
||||
SELECT
|
||||
count(*) AS total,
|
||||
count(*) FILTER (WHERE array_length(authority_cuis, 1) > 0) AS with_auth_cui,
|
||||
round(100.0 * count(*) FILTER (WHERE array_length(authority_cuis, 1) > 0) / count(*), 1) AS pct
|
||||
FROM cnsc.decizii;
|
||||
|
||||
DROP TABLE tmp_firms_uat, tmp_cnsc_unmatched;
|
||||
@@ -0,0 +1,87 @@
|
||||
-- 043_red_flags_kpi_snapshot.sql
|
||||
-- Materialize red-flags landing KPI counters to a static refresh table.
|
||||
-- Original page was 30s with all KPI INTERSECTs + 13 recipe fetches running.
|
||||
-- KPI INTERSECTs (TRIPLE/QUADRA pipe etc.) alone = 12s. Now ~1ms read.
|
||||
--
|
||||
-- Refresh: nightly via mvs cron at 04:00 (or add to refresh-mvs.sh).
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS public_kpi;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS public_kpi.red_flags_counts (
|
||||
k_name text PRIMARY KEY,
|
||||
k_value bigint NOT NULL,
|
||||
refreshed_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
COMMENT ON TABLE public_kpi.red_flags_counts IS
|
||||
'KPI counters surfaced on /achizitii/red-flags landing. Refreshed nightly. Page reads as a single SELECT to avoid 12s INTERSECT cost.';
|
||||
|
||||
-- Refresh function — called by cron
|
||||
CREATE OR REPLACE FUNCTION public_kpi.refresh_red_flags_counts() RETURNS void AS $$
|
||||
BEGIN
|
||||
INSERT INTO public_kpi.red_flags_counts (k_name, k_value, refreshed_at) VALUES
|
||||
('regas_seap_firms',
|
||||
(SELECT count(DISTINCT cui) FROM regas.ajutoare WHERE cui IS NOT NULL
|
||||
AND cui IN (SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL)),
|
||||
now()),
|
||||
('regas_seap_records',
|
||||
(SELECT count(*) FROM regas.ajutoare WHERE cui IS NOT NULL
|
||||
AND cui IN (SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL)),
|
||||
now()),
|
||||
('ben_seap_firms',
|
||||
(SELECT count(DISTINCT cui) FROM fonduri.beneficiar_anunt WHERE cui IS NOT NULL
|
||||
AND cui IN (SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL)),
|
||||
now()),
|
||||
('datornic_seap_firms',
|
||||
(SELECT count(DISTINCT d.cui) FROM anaf.datornici d
|
||||
JOIN seap.announcements a ON a.supplier_cui = d.cui
|
||||
AND a.publication_date::date > d.publication_date
|
||||
AND a.awarded_value > 100000),
|
||||
now()),
|
||||
('aep_seap_firms',
|
||||
(SELECT count(DISTINCT donator_cui) FROM aep.donatii_pj
|
||||
WHERE donator_cui IS NOT NULL
|
||||
AND donator_cui IN (SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL AND awarded_value > 100000)),
|
||||
now()),
|
||||
('triple_firms',
|
||||
(SELECT count(*) FROM (
|
||||
SELECT b.cui FROM fonduri.beneficiar_anunt b WHERE cui IS NOT NULL
|
||||
INTERSECT SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL
|
||||
INTERSECT SELECT cui FROM fonduri.afir_plati WHERE cui IS NOT NULL
|
||||
) t),
|
||||
now()),
|
||||
('quadra_firms',
|
||||
(SELECT count(*) FROM (
|
||||
SELECT b.cui FROM fonduri.beneficiar_anunt b WHERE cui IS NOT NULL
|
||||
INTERSECT SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL
|
||||
INTERSECT SELECT cui FROM fonduri.afir_plati WHERE cui IS NOT NULL
|
||||
INTERSECT SELECT cui FROM regas.ajutoare WHERE cui IS NOT NULL
|
||||
) t),
|
||||
now()),
|
||||
('donator_datornic_firms',
|
||||
(SELECT count(DISTINCT a.donator_cui) FROM aep.donatii_pj a
|
||||
JOIN anaf.datornici_latest d ON d.cui = a.donator_cui),
|
||||
now()),
|
||||
('anre_datornic_firms',
|
||||
(SELECT count(DISTINCT a.cui) FROM anre.mv_licente_per_cui a
|
||||
JOIN anaf.datornici_latest d ON d.cui = a.cui WHERE a.nr_active > 0),
|
||||
now()),
|
||||
('dubla_alerta_firms',
|
||||
(SELECT count(*) FROM (
|
||||
SELECT audited_entity_cui AS cui FROM curteacont.rapoarte
|
||||
WHERE audited_entity_cui IS NOT NULL AND publication_date >= now() - interval '5 years'
|
||||
GROUP BY audited_entity_cui HAVING count(*) >= 2
|
||||
INTERSECT
|
||||
SELECT cui FROM cnsc.mv_per_authority_cui WHERE contestation_count >= 3
|
||||
) t),
|
||||
now())
|
||||
ON CONFLICT (k_name) DO UPDATE
|
||||
SET k_value = EXCLUDED.k_value,
|
||||
refreshed_at = EXCLUDED.refreshed_at;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Initial populate
|
||||
SELECT public_kpi.refresh_red_flags_counts();
|
||||
|
||||
SELECT k_name, k_value, refreshed_at FROM public_kpi.red_flags_counts ORDER BY k_name;
|
||||
@@ -0,0 +1,628 @@
|
||||
-- 044_red_flags_previews_snapshot.sql
|
||||
-- Materialize TOP-5 row previews per red-flag recipe to a snapshot table.
|
||||
-- Original landing was ~17s (13 recipe.fetch() calls running live, each 1-12s).
|
||||
-- Now: single SELECT against this table → ~5ms.
|
||||
--
|
||||
-- Refresh: nightly from refresh-mvs.sh after KPI snapshot refresh.
|
||||
--
|
||||
-- Mirrors the RecipeRow interface (src/lib/recipes.ts):
|
||||
-- primary, primaryHref, secondary, metric, metricRaw, detail, badge.{label,tone}
|
||||
--
|
||||
-- For each of the 13 RED_FLAG_SLUGS in src/pages/achizitii/red-flags.astro,
|
||||
-- we re-implement the SQL inline (returning the formatted output the page
|
||||
-- needs) and select TOP-5 per slug.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS public_kpi;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS public_kpi.red_flags_previews (
|
||||
slug text NOT NULL,
|
||||
position smallint NOT NULL,
|
||||
primary_text text,
|
||||
primary_href text,
|
||||
secondary text,
|
||||
metric text,
|
||||
metric_raw numeric,
|
||||
detail text,
|
||||
badge_label text,
|
||||
badge_tone text,
|
||||
computed_at timestamptz NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (slug, position)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE public_kpi.red_flags_previews IS
|
||||
'Top-5 row previews per red-flag recipe. Refreshed nightly via refresh-mvs.sh. Read once per landing page load instead of running 13 live cross-source queries.';
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- Helper: RON formatter mirroring fmtRON() in src/lib/recipes.ts.
|
||||
-- Returns short-form: "1.2 mld", "12.3 mil", "456K", "789".
|
||||
CREATE OR REPLACE FUNCTION public_kpi.fmt_ron(v numeric) RETURNS text AS $$
|
||||
BEGIN
|
||||
IF v IS NULL OR v <= 0 THEN RETURN '0'; END IF;
|
||||
IF v >= 1000000000 THEN RETURN to_char(v / 1000000000.0, 'FM999990.0') || ' mld'; END IF;
|
||||
IF v >= 1000000 THEN RETURN to_char(v / 1000000.0, 'FM999990.0') || ' mil'; END IF;
|
||||
IF v >= 1000 THEN RETURN to_char(round(v / 1000.0), 'FM999999990') || 'K'; END IF;
|
||||
RETURN to_char(round(v), 'FM999999990');
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
CREATE OR REPLACE FUNCTION public_kpi.refresh_red_flags_previews() RETURNS void AS $$
|
||||
BEGIN
|
||||
DELETE FROM public_kpi.red_flags_previews;
|
||||
|
||||
-- ───── firme-quadra-pipe-public ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH ben AS (
|
||||
SELECT b.cui, COUNT(*) AS anunturi_eu, SUM(l.buget_lei)::numeric AS buget_eu
|
||||
FROM fonduri.beneficiar_anunt b
|
||||
LEFT JOIN fonduri.beneficiar_anunt_lot l ON l.anunt_id = b.id
|
||||
WHERE b.cui IS NOT NULL GROUP BY b.cui
|
||||
),
|
||||
seap AS (
|
||||
SELECT supplier_cui AS cui, COUNT(*) AS contracte, SUM(awarded_value)::numeric AS valoare
|
||||
FROM seap.announcements
|
||||
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
|
||||
GROUP BY supplier_cui
|
||||
),
|
||||
afir AS (
|
||||
SELECT cui, COUNT(*) AS plati,
|
||||
SUM(COALESCE(ue_total, feadr_total, fega_total, op_amount, 0))::numeric AS afir_lei
|
||||
FROM fonduri.afir_plati WHERE cui IS NOT NULL GROUP BY cui
|
||||
),
|
||||
regas AS (
|
||||
SELECT cui, COUNT(*) AS nr_ajutoare,
|
||||
SUM(ajutor_acordat_subcategorie)::numeric AS regas_lei
|
||||
FROM regas.ajutoare WHERE cui IS NOT NULL GROUP BY cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT b.cui, e.name, e.adr_judet,
|
||||
b.buget_eu, s.valoare AS seap_lei, a.afir_lei, r.regas_lei,
|
||||
(COALESCE(b.buget_eu,0) + COALESCE(s.valoare,0) + COALESCE(a.afir_lei,0) + COALESCE(r.regas_lei,0))::numeric AS total_combined,
|
||||
ROW_NUMBER() OVER (ORDER BY (COALESCE(b.buget_eu,0) + COALESCE(s.valoare,0) + COALESCE(a.afir_lei,0) + COALESCE(r.regas_lei,0)) DESC NULLS LAST) AS rn
|
||||
FROM ben b
|
||||
JOIN seap s ON s.cui = b.cui
|
||||
JOIN afir a ON a.cui = b.cui
|
||||
JOIN regas r ON r.cui = b.cui
|
||||
JOIN firms.entities e ON e.cui = b.cui
|
||||
)
|
||||
SELECT 'firme-quadra-pipe-public', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
|
||||
public_kpi.fmt_ron(total_combined) || ' RON',
|
||||
total_combined,
|
||||
'EU: ' || public_kpi.fmt_ron(COALESCE(buget_eu,0)) || ' · SEAP: ' || public_kpi.fmt_ron(COALESCE(seap_lei,0)) || ' · AFIR: ' || public_kpi.fmt_ron(COALESCE(afir_lei,0)) || ' · RegAS: ' || public_kpi.fmt_ron(COALESCE(regas_lei,0)) || ' RON',
|
||||
'🔱 QUADRA pipe', 'risk'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── firme-triplu-pipe-public ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH ben AS (
|
||||
SELECT b.cui, COUNT(*) AS anunturi_eu, SUM(l.buget_lei)::numeric AS buget_eu
|
||||
FROM fonduri.beneficiar_anunt b
|
||||
LEFT JOIN fonduri.beneficiar_anunt_lot l ON l.anunt_id = b.id
|
||||
WHERE b.cui IS NOT NULL GROUP BY b.cui
|
||||
),
|
||||
seap AS (
|
||||
SELECT supplier_cui AS cui, COUNT(*) AS contracte, SUM(awarded_value)::numeric AS valoare
|
||||
FROM seap.announcements
|
||||
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
|
||||
GROUP BY supplier_cui
|
||||
),
|
||||
afir AS (
|
||||
SELECT cui, COUNT(*) AS plati,
|
||||
SUM(COALESCE(ue_total, feadr_total, fega_total, op_amount, 0))::numeric AS afir_lei
|
||||
FROM fonduri.afir_plati WHERE cui IS NOT NULL GROUP BY cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT b.cui, e.name, e.adr_judet,
|
||||
b.anunturi_eu, b.buget_eu, s.contracte, s.valoare AS seap_lei, a.plati, a.afir_lei,
|
||||
(COALESCE(b.buget_eu,0) + COALESCE(s.valoare,0) + COALESCE(a.afir_lei,0))::numeric AS total_combined,
|
||||
ROW_NUMBER() OVER (ORDER BY (COALESCE(b.buget_eu,0) + COALESCE(s.valoare,0) + COALESCE(a.afir_lei,0)) DESC NULLS LAST) AS rn
|
||||
FROM ben b
|
||||
JOIN seap s ON s.cui = b.cui
|
||||
JOIN afir a ON a.cui = b.cui
|
||||
JOIN firms.entities e ON e.cui = b.cui
|
||||
)
|
||||
SELECT 'firme-triplu-pipe-public', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
|
||||
public_kpi.fmt_ron(total_combined) || ' RON',
|
||||
total_combined,
|
||||
'EU privat: ' || anunturi_eu || ' / ' || public_kpi.fmt_ron(COALESCE(buget_eu,0)) || ' · SEAP: ' || contracte || ' / ' || public_kpi.fmt_ron(COALESCE(seap_lei,0)) || ' · AFIR: ' || plati || ' / ' || public_kpi.fmt_ron(COALESCE(afir_lei,0)) || ' RON',
|
||||
'🔱 triplu pipe', 'risk'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── firme-datornice-cu-contracte-seap ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH agg AS (
|
||||
SELECT d.cui, d.name,
|
||||
MIN(d.publication_date) AS pub_date,
|
||||
MIN(d.period_label) AS period_label,
|
||||
MAX(d.debt_total) AS debt_total,
|
||||
MAX(d.debtor_category) AS debtor_category,
|
||||
COUNT(DISTINCT a.id) AS contracte,
|
||||
SUM(a.awarded_value)::numeric AS contracte_lei,
|
||||
MAX(a.publication_date::date) AS ultim_contract
|
||||
FROM anaf.datornici d
|
||||
JOIN seap.announcements a
|
||||
ON a.supplier_cui = d.cui
|
||||
AND a.publication_date::date > d.publication_date
|
||||
WHERE a.awarded_value IS NOT NULL AND a.awarded_value > 0
|
||||
GROUP BY d.cui, d.name
|
||||
HAVING SUM(a.awarded_value) > 100000
|
||||
),
|
||||
ranked AS (
|
||||
SELECT agg.*, e.adr_judet AS judet,
|
||||
ROW_NUMBER() OVER (ORDER BY contracte_lei DESC NULLS LAST) AS rn
|
||||
FROM agg LEFT JOIN firms.entities e ON e.cui = agg.cui
|
||||
)
|
||||
SELECT 'firme-datornice-cu-contracte-seap', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN judet IS NOT NULL THEN ' · ' || judet ELSE '' END || ' · ' || COALESCE(period_label, 'T1 2016'),
|
||||
public_kpi.fmt_ron(contracte_lei) || ' RON',
|
||||
contracte_lei,
|
||||
'Datorie ' || public_kpi.fmt_ron(COALESCE(debt_total,0)) || ' RON (' || COALESCE(debtor_category::text, '') || ') · ' || contracte || ' contracte · ultim ' || COALESCE(ultim_contract::text, '?'),
|
||||
'🚨 datornic + contract', 'risk'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── donatori-care-au-castigat-seap ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH don AS (
|
||||
SELECT donator_cui AS cui,
|
||||
MIN(donator_nume) AS donator,
|
||||
COUNT(*) AS nr_donatii,
|
||||
SUM(suma_lei)::numeric AS total_donatii,
|
||||
array_agg(DISTINCT partid_id ORDER BY partid_id) AS partide,
|
||||
MIN(an) AS prima, MAX(an) AS ultima
|
||||
FROM aep.donatii_pj
|
||||
WHERE donator_cui IS NOT NULL
|
||||
GROUP BY donator_cui
|
||||
),
|
||||
seap AS (
|
||||
SELECT supplier_cui AS cui,
|
||||
COUNT(*) AS contracte,
|
||||
SUM(awarded_value)::numeric AS contracte_lei,
|
||||
COUNT(DISTINCT authority_cui) AS autoritati
|
||||
FROM seap.announcements
|
||||
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
|
||||
GROUP BY supplier_cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT d.cui, COALESCE(e.name, d.donator) AS firma, e.adr_judet AS judet,
|
||||
d.nr_donatii, d.total_donatii, d.partide, d.prima, d.ultima,
|
||||
s.contracte, s.contracte_lei, s.autoritati,
|
||||
ROW_NUMBER() OVER (ORDER BY s.contracte_lei DESC NULLS LAST) AS rn
|
||||
FROM don d
|
||||
JOIN seap s ON s.cui = d.cui
|
||||
LEFT JOIN firms.entities e ON e.cui = d.cui
|
||||
WHERE s.contracte_lei > 100000
|
||||
)
|
||||
SELECT 'donatori-care-au-castigat-seap', rn::smallint,
|
||||
COALESCE(firma, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN judet IS NOT NULL THEN ' · ' || judet ELSE '' END || ' · partide: ' || array_to_string(COALESCE(partide, ARRAY[]::text[]), ', '),
|
||||
public_kpi.fmt_ron(contracte_lei) || ' RON',
|
||||
contracte_lei,
|
||||
nr_donatii || ' donații (' || public_kpi.fmt_ron(COALESCE(total_donatii,0)) || ' RON, ' || prima || '-' || ultima || ') · ' || contracte || ' contracte la ' || autoritati || ' autorități',
|
||||
'🗳️ donator + furnizor', 'warn'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── donatori-politici-care-datoreaza-statului ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH aep_agg AS (
|
||||
SELECT donator_cui AS cui,
|
||||
SUM(suma_lei)::numeric AS total_donat,
|
||||
array_agg(DISTINCT partid_id ORDER BY partid_id) FILTER (WHERE partid_id IS NOT NULL) AS partide,
|
||||
COUNT(*) AS nr_donatii,
|
||||
MIN(an) AS prima_an, MAX(an) AS ultima_an
|
||||
FROM aep.donatii_pj
|
||||
WHERE donator_cui IS NOT NULL
|
||||
GROUP BY donator_cui
|
||||
),
|
||||
seap_supplier AS (
|
||||
SELECT supplier_cui AS cui,
|
||||
COUNT(*) AS contracte,
|
||||
SUM(awarded_value)::numeric AS valoare_seap
|
||||
FROM seap.announcements
|
||||
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
|
||||
GROUP BY supplier_cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT d.cui, d.period_label, d.debt_total::numeric AS debt_total,
|
||||
a.total_donat, a.partide, a.nr_donatii, a.prima_an, a.ultima_an,
|
||||
e.name, e.adr_judet,
|
||||
COALESCE(s.contracte, 0) AS contracte_seap,
|
||||
COALESCE(s.valoare_seap, 0)::numeric AS valoare_seap,
|
||||
ROW_NUMBER() OVER (ORDER BY d.debt_total DESC NULLS LAST) AS rn
|
||||
FROM aep_agg a
|
||||
JOIN anaf.datornici_latest d ON d.cui = a.cui
|
||||
LEFT JOIN firms.entities e ON e.cui = a.cui
|
||||
LEFT JOIN seap_supplier s ON s.cui = a.cui
|
||||
)
|
||||
SELECT 'donatori-politici-care-datoreaza-statului', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END || ' · partide: ' || array_to_string(COALESCE(partide, ARRAY[]::text[]), ', ') || ' (' || prima_an || '–' || ultima_an || ')',
|
||||
public_kpi.fmt_ron(debt_total) || ' RON datorie',
|
||||
debt_total,
|
||||
'🗳️ donat ' || public_kpi.fmt_ron(COALESCE(total_donat,0)) || ' RON · 🚨 datornic ' || COALESCE(period_label,'') ||
|
||||
CASE WHEN contracte_seap > 0 THEN ' · 📜 ' || contracte_seap || ' contracte SEAP (' || public_kpi.fmt_ron(valoare_seap) || ' RON)' ELSE '' END,
|
||||
'🚨 donator + datornic', 'risk'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── donatori-politici-care-contesta-la-cnsc ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH aep_agg AS (
|
||||
SELECT donator_cui AS cui,
|
||||
SUM(suma_lei)::numeric AS total_donat,
|
||||
array_agg(DISTINCT partid_id ORDER BY partid_id) FILTER (WHERE partid_id IS NOT NULL) AS partide,
|
||||
COUNT(*) AS nr_donatii,
|
||||
MAX(an) AS ultima_an
|
||||
FROM aep.donatii_pj
|
||||
WHERE donator_cui IS NOT NULL
|
||||
GROUP BY donator_cui
|
||||
),
|
||||
seap AS (
|
||||
SELECT supplier_cui AS cui,
|
||||
COUNT(*) AS contracte,
|
||||
SUM(awarded_value)::numeric AS valoare_seap
|
||||
FROM seap.announcements
|
||||
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
|
||||
GROUP BY supplier_cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT a.cui, e.name, e.adr_judet,
|
||||
a.total_donat, a.partide, a.ultima_an,
|
||||
c.contestations_filed,
|
||||
COALESCE(s.contracte, 0) AS contracte_seap,
|
||||
COALESCE(s.valoare_seap, 0)::numeric AS valoare_seap,
|
||||
ROW_NUMBER() OVER (ORDER BY (a.total_donat * c.contestations_filed) DESC NULLS LAST) AS rn
|
||||
FROM aep_agg a
|
||||
JOIN cnsc.mv_per_contestator_cui c ON c.cui = a.cui
|
||||
LEFT JOIN firms.entities e ON e.cui = a.cui
|
||||
LEFT JOIN seap s ON s.cui = a.cui
|
||||
)
|
||||
SELECT 'donatori-politici-care-contesta-la-cnsc', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END || ' · partide: ' || array_to_string(COALESCE(partide, ARRAY[]::text[]), ', ') || ' (ultima ' || ultima_an || ')',
|
||||
public_kpi.fmt_ron(total_donat) || ' × ' || contestations_filed,
|
||||
(total_donat * contestations_filed)::numeric,
|
||||
'🗳️ donat ' || public_kpi.fmt_ron(total_donat) || ' RON · ⚖️ ' || contestations_filed || ' contestații CNSC' ||
|
||||
CASE WHEN contracte_seap > 0 THEN ' · 📜 ' || contracte_seap || ' contracte SEAP (' || public_kpi.fmt_ron(valoare_seap) || ' RON)' ELSE '' END,
|
||||
'🗳️⚖️ donator + contestator', 'risk'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── energie-licentiati-anre-datornici-anaf ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH seap AS (
|
||||
SELECT supplier_cui AS cui,
|
||||
COUNT(*) AS contracte,
|
||||
SUM(awarded_value)::numeric AS valoare_seap
|
||||
FROM seap.announcements
|
||||
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
|
||||
GROUP BY supplier_cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT a.cui, e.name, e.adr_judet,
|
||||
a.nr_active, a.nr_expirate, a.surse,
|
||||
d.debt_total::numeric AS debt_total, d.period_label,
|
||||
COALESCE(s.contracte, 0) AS contracte_seap,
|
||||
COALESCE(s.valoare_seap, 0)::numeric AS valoare_seap,
|
||||
ROW_NUMBER() OVER (ORDER BY d.debt_total DESC NULLS LAST) AS rn
|
||||
FROM anre.mv_licente_per_cui a
|
||||
JOIN anaf.datornici_latest d ON d.cui = a.cui
|
||||
LEFT JOIN firms.entities e ON e.cui = a.cui
|
||||
LEFT JOIN seap s ON s.cui = a.cui
|
||||
WHERE a.nr_active > 0
|
||||
)
|
||||
SELECT 'energie-licentiati-anre-datornici-anaf', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END || ' · 🔌 ' || nr_active || ' licențe active (' || array_to_string(COALESCE(surse, ARRAY[]::text[]), '/') || ')' || CASE WHEN nr_expirate > 0 THEN ', ' || nr_expirate || ' expirate' ELSE '' END,
|
||||
public_kpi.fmt_ron(debt_total) || ' RON datorie',
|
||||
debt_total,
|
||||
'🚨 datornic ' || COALESCE(period_label, '') ||
|
||||
CASE WHEN contracte_seap > 0 THEN ' · 📜 ' || contracte_seap || ' contracte SEAP (' || public_kpi.fmt_ron(valoare_seap) || ' RON)' ELSE '' END,
|
||||
'🚨 ANRE + datornic', 'risk'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── autoritati-dubla-alerta-cdc-cnsc ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH cdc AS (
|
||||
SELECT audited_entity_cui AS cui,
|
||||
COUNT(*) AS audit_count,
|
||||
array_agg(DISTINCT audit_type) FILTER (WHERE audit_type IS NOT NULL) AS audit_types
|
||||
FROM curteacont.rapoarte
|
||||
WHERE audited_entity_cui IS NOT NULL
|
||||
AND publication_date >= now() - interval '5 years'
|
||||
GROUP BY audited_entity_cui
|
||||
HAVING COUNT(*) >= 2
|
||||
),
|
||||
cnsc_ AS (
|
||||
SELECT cui, contestation_count
|
||||
FROM cnsc.mv_per_authority_cui
|
||||
WHERE contestation_count >= 3
|
||||
),
|
||||
seap AS (
|
||||
SELECT authority_cui AS cui,
|
||||
COUNT(*) AS proceduri,
|
||||
SUM(COALESCE(awarded_value, estimated_value))::numeric AS valoare
|
||||
FROM seap.announcements
|
||||
WHERE authority_cui IS NOT NULL
|
||||
GROUP BY authority_cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT a.cui, e.name, e.adr_judet,
|
||||
a.audit_count, a.audit_types,
|
||||
c.contestation_count,
|
||||
(a.audit_count + c.contestation_count) AS total_signals,
|
||||
COALESCE(s.proceduri, 0) AS proceduri,
|
||||
COALESCE(s.valoare, 0)::numeric AS valoare,
|
||||
ROW_NUMBER() OVER (ORDER BY (a.audit_count + c.contestation_count) DESC, c.contestation_count DESC) AS rn
|
||||
FROM cdc a
|
||||
JOIN cnsc_ c ON c.cui = a.cui
|
||||
LEFT JOIN firms.entities e ON e.cui = a.cui
|
||||
LEFT JOIN seap s ON s.cui = a.cui
|
||||
)
|
||||
SELECT 'autoritati-dubla-alerta-cdc-cnsc', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/autoritate/' || cui,
|
||||
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
|
||||
total_signals || ' semnale',
|
||||
total_signals::numeric,
|
||||
'📋 ' || audit_count || ' audituri (' || array_to_string(COALESCE(audit_types, ARRAY[]::text[]), '/') || ') · ⚖️ ' || contestation_count || ' contestații CNSC · ' || proceduri || ' proceduri SEAP · ' || public_kpi.fmt_ron(COALESCE(valoare,0)) || ' RON',
|
||||
'🚨 dublă alertă', 'risk'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── autoritati-contestate-cnsc ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH ranked AS (
|
||||
SELECT m.cui, m.contestation_count,
|
||||
m.first_contestation_date, m.last_contestation_date,
|
||||
e.name, e.adr_judet,
|
||||
COALESCE(s.proceduri, 0) AS proceduri,
|
||||
COALESCE(s.valoare, 0)::numeric AS valoare,
|
||||
ROW_NUMBER() OVER (ORDER BY m.contestation_count DESC) AS rn
|
||||
FROM cnsc.mv_per_authority_cui m
|
||||
LEFT JOIN firms.entities e ON e.cui = m.cui
|
||||
LEFT JOIN (
|
||||
SELECT authority_cui,
|
||||
COUNT(*) AS proceduri,
|
||||
SUM(awarded_value)::numeric AS valoare
|
||||
FROM seap.announcements
|
||||
WHERE authority_cui IS NOT NULL
|
||||
GROUP BY authority_cui
|
||||
) s ON s.authority_cui = m.cui
|
||||
)
|
||||
SELECT 'autoritati-contestate-cnsc', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/autoritate/' || cui,
|
||||
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
|
||||
contestation_count || ' contestații',
|
||||
contestation_count::numeric,
|
||||
proceduri || ' proceduri SEAP · ' || public_kpi.fmt_ron(COALESCE(valoare,0)) || ' RON · contestații ' ||
|
||||
COALESCE(extract(year FROM first_contestation_date)::text, '?') || '–' ||
|
||||
COALESCE(extract(year FROM last_contestation_date)::text, '?'),
|
||||
'⚖️ CNSC', 'warn'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── energie-fara-licenta ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH energy_seap AS (
|
||||
SELECT a.supplier_cui,
|
||||
COUNT(*) AS contracte,
|
||||
SUM(a.awarded_value)::numeric AS valoare,
|
||||
COUNT(DISTINCT a.authority_cui) AS autoritati,
|
||||
array_agg(DISTINCT substr(a.cpv_code, 1, 4)) AS cpv_prefs
|
||||
FROM seap.announcements a
|
||||
WHERE a.supplier_cui IS NOT NULL
|
||||
AND a.awarded_value > 0
|
||||
AND a.cpv_code LIKE '09%'
|
||||
GROUP BY a.supplier_cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT s.supplier_cui AS cui, e.name, e.adr_judet,
|
||||
s.contracte, s.valoare, s.autoritati, s.cpv_prefs,
|
||||
ROW_NUMBER() OVER (ORDER BY s.valoare DESC NULLS LAST) AS rn
|
||||
FROM energy_seap s
|
||||
LEFT JOIN anre.mv_licente_per_cui m ON m.cui = s.supplier_cui
|
||||
JOIN firms.entities e ON e.cui = s.supplier_cui
|
||||
WHERE m.cui IS NULL
|
||||
)
|
||||
SELECT 'energie-fara-licenta', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
|
||||
public_kpi.fmt_ron(valoare) || ' RON',
|
||||
valoare,
|
||||
contracte || ' contracte · ' || autoritati || ' autorități · CPV ' || array_to_string((COALESCE(cpv_prefs, ARRAY[]::text[]))[1:5], ', '),
|
||||
'fără licență ANRE', 'risk'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── telco-fara-licenta ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH telco_seap AS (
|
||||
SELECT a.supplier_cui,
|
||||
COUNT(*) AS contracte,
|
||||
SUM(a.awarded_value)::numeric AS valoare,
|
||||
COUNT(DISTINCT a.authority_cui) AS autoritati,
|
||||
array_agg(DISTINCT substr(a.cpv_code, 1, 4)) AS cpv_prefs
|
||||
FROM seap.announcements a
|
||||
WHERE a.supplier_cui IS NOT NULL
|
||||
AND a.awarded_value > 0
|
||||
AND a.cpv_code LIKE '64%'
|
||||
GROUP BY a.supplier_cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT s.supplier_cui AS cui, e.name, e.adr_judet,
|
||||
s.contracte, s.valoare, s.autoritati, s.cpv_prefs,
|
||||
ROW_NUMBER() OVER (ORDER BY s.valoare DESC NULLS LAST) AS rn
|
||||
FROM telco_seap s
|
||||
LEFT JOIN ancom.mv_operatori_per_cui m ON m.cui = s.supplier_cui
|
||||
JOIN firms.entities e ON e.cui = s.supplier_cui
|
||||
WHERE m.cui IS NULL
|
||||
)
|
||||
SELECT 'telco-fara-licenta', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
|
||||
public_kpi.fmt_ron(valoare) || ' RON',
|
||||
valoare,
|
||||
contracte || ' contracte · ' || autoritati || ' autorități · CPV ' || array_to_string((COALESCE(cpv_prefs, ARRAY[]::text[]))[1:5], ', '),
|
||||
'fără autorizare ANCOM', 'risk'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── stat-actionar-seap ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH aaas_agg AS (
|
||||
SELECT cui, max_state_share_pct, total_debt_to_state_lei, statusuri
|
||||
FROM aaas.mv_per_cui
|
||||
),
|
||||
seap AS (
|
||||
SELECT supplier_cui AS cui,
|
||||
COUNT(*) AS contracte,
|
||||
SUM(awarded_value)::numeric AS total_lei,
|
||||
COUNT(DISTINCT authority_cui) AS autoritati
|
||||
FROM seap.announcements
|
||||
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
|
||||
GROUP BY supplier_cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT a.cui,
|
||||
COALESCE(e.name, f.name) AS firma,
|
||||
e.adr_judet AS judet,
|
||||
a.max_state_share_pct,
|
||||
a.total_debt_to_state_lei,
|
||||
a.statusuri,
|
||||
s.contracte, s.total_lei, s.autoritati,
|
||||
ROW_NUMBER() OVER (ORDER BY s.total_lei DESC NULLS LAST) AS rn
|
||||
FROM aaas_agg a
|
||||
JOIN seap s ON s.cui = a.cui
|
||||
LEFT JOIN firms.entities e ON e.cui = a.cui
|
||||
LEFT JOIN aaas.firme f ON f.cui = a.cui
|
||||
)
|
||||
SELECT 'stat-actionar-seap', rn::smallint,
|
||||
COALESCE(firma, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN judet IS NOT NULL THEN ' · ' || judet ELSE '' END || ' · stat ' || CASE WHEN max_state_share_pct IS NOT NULL THEN to_char(max_state_share_pct, 'FM999990.0') || '%' ELSE '?' END || ' · ' || array_to_string(COALESCE(statusuri, ARRAY[]::text[]), ', '),
|
||||
public_kpi.fmt_ron(total_lei) || ' RON',
|
||||
total_lei,
|
||||
contracte || ' contracte la ' || autoritati || ' autorități' ||
|
||||
CASE WHEN total_debt_to_state_lei IS NOT NULL AND total_debt_to_state_lei > 0
|
||||
THEN ' · datorii la stat ' || public_kpi.fmt_ron(total_debt_to_state_lei) || ' RON'
|
||||
ELSE '' END,
|
||||
'🏛️ stat→stat', 'risk'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── firme-cu-ajutor-de-stat-si-seap ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH regas_amount AS (
|
||||
SELECT cui,
|
||||
COUNT(*) AS nr_ajutoare,
|
||||
SUM(ajutor_acordat_subcategorie) AS total_ajutor,
|
||||
COUNT(DISTINCT id_masura) AS nr_masuri,
|
||||
COUNT(DISTINCT finantator) AS nr_finantatori
|
||||
FROM regas.ajutoare
|
||||
WHERE cui IS NOT NULL
|
||||
GROUP BY cui
|
||||
),
|
||||
seap_amount AS (
|
||||
SELECT supplier_cui AS cui,
|
||||
COUNT(*) AS contracte_seap,
|
||||
SUM(awarded_value) AS valoare_seap,
|
||||
COUNT(DISTINCT authority_cui) AS autoritati
|
||||
FROM seap.announcements
|
||||
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
|
||||
GROUP BY supplier_cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT r.cui, e.name, e.adr_judet,
|
||||
r.nr_ajutoare, r.total_ajutor::numeric AS total_ajutor, r.nr_finantatori,
|
||||
s.contracte_seap, s.valoare_seap::numeric AS valoare_seap, s.autoritati,
|
||||
(COALESCE(r.total_ajutor,0) + COALESCE(s.valoare_seap,0))::numeric AS total_combined,
|
||||
ROW_NUMBER() OVER (ORDER BY (COALESCE(r.total_ajutor,0) + COALESCE(s.valoare_seap,0)) DESC NULLS LAST) AS rn
|
||||
FROM regas_amount r
|
||||
JOIN seap_amount s ON s.cui = r.cui
|
||||
JOIN firms.entities e ON e.cui = r.cui
|
||||
WHERE COALESCE(r.total_ajutor,0) + COALESCE(s.valoare_seap,0) > 0
|
||||
)
|
||||
SELECT 'firme-cu-ajutor-de-stat-si-seap', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
|
||||
public_kpi.fmt_ron(total_combined) || ' RON',
|
||||
total_combined,
|
||||
'Ajutor stat: ' || nr_ajutoare || ' ajutoare / ' || public_kpi.fmt_ron(COALESCE(total_ajutor,0)) || ' RON la ' || nr_finantatori || ' finanțatori · SEAP: ' || contracte_seap || ' contracte / ' || public_kpi.fmt_ron(COALESCE(valoare_seap,0)) || ' RON la ' || autoritati || ' autorități',
|
||||
'Ajutor + SEAP', 'warn'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
-- ───── firme-cu-fonduri-eu-si-seap ─────
|
||||
INSERT INTO public_kpi.red_flags_previews
|
||||
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
|
||||
WITH ben_amount AS (
|
||||
SELECT b.cui,
|
||||
COUNT(*) AS anunturi_eu,
|
||||
SUM(l.buget_lei) AS buget_eu
|
||||
FROM fonduri.beneficiar_anunt b
|
||||
LEFT JOIN fonduri.beneficiar_anunt_lot l ON l.anunt_id = b.id
|
||||
WHERE b.cui IS NOT NULL
|
||||
GROUP BY b.cui
|
||||
),
|
||||
seap_amount AS (
|
||||
SELECT supplier_cui AS cui,
|
||||
COUNT(*) AS contracte_seap,
|
||||
SUM(awarded_value) AS valoare_seap,
|
||||
COUNT(DISTINCT authority_cui) AS autoritati
|
||||
FROM seap.announcements
|
||||
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
|
||||
GROUP BY supplier_cui
|
||||
),
|
||||
ranked AS (
|
||||
SELECT b.cui, e.name, e.adr_judet,
|
||||
b.anunturi_eu, b.buget_eu::numeric AS buget_eu,
|
||||
s.contracte_seap, s.valoare_seap::numeric AS valoare_seap, s.autoritati,
|
||||
(COALESCE(b.buget_eu,0) + COALESCE(s.valoare_seap,0))::numeric AS total_combined,
|
||||
ROW_NUMBER() OVER (ORDER BY (COALESCE(b.buget_eu,0) + COALESCE(s.valoare_seap,0)) DESC NULLS LAST) AS rn
|
||||
FROM ben_amount b
|
||||
JOIN seap_amount s ON s.cui = b.cui
|
||||
JOIN firms.entities e ON e.cui = b.cui
|
||||
WHERE COALESCE(b.buget_eu,0) + COALESCE(s.valoare_seap,0) > 0
|
||||
)
|
||||
SELECT 'firme-cu-fonduri-eu-si-seap', rn::smallint,
|
||||
COALESCE(name, 'CUI ' || cui),
|
||||
'/achizitii/firma/' || cui,
|
||||
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
|
||||
public_kpi.fmt_ron(total_combined) || ' RON',
|
||||
total_combined,
|
||||
'EU: ' || anunturi_eu || ' anunțuri / ' || public_kpi.fmt_ron(COALESCE(buget_eu,0)) || ' RON · SEAP: ' || contracte_seap || ' contracte / ' || public_kpi.fmt_ron(COALESCE(valoare_seap,0)) || ' RON la ' || autoritati || ' autorități',
|
||||
'EU + SEAP', 'warn'
|
||||
FROM ranked WHERE rn <= 5;
|
||||
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Initial populate
|
||||
SELECT public_kpi.refresh_red_flags_previews();
|
||||
|
||||
SELECT slug, COUNT(*) AS rows
|
||||
FROM public_kpi.red_flags_previews
|
||||
GROUP BY slug
|
||||
ORDER BY slug;
|
||||
@@ -0,0 +1,22 @@
|
||||
-- 045: composite index on seap.announcements (publication_date DESC NULLS LAST, id DESC)
|
||||
--
|
||||
-- Problem: /achizitii/cauta with no filters (home browse) was slow (~3s on
|
||||
-- empty q). The page does `ORDER BY a.publication_date DESC NULLS LAST,
|
||||
-- a.id DESC LIMIT 30`, which couldn't use the existing
|
||||
-- `idx_ann_pub_date btree (publication_date)` because:
|
||||
-- - Default btree is ASC NULLS LAST (DESC NULLS LAST mismatch for nulls)
|
||||
-- - Secondary sort `id DESC` requires composite
|
||||
-- Planner fell back to Seq Scan + top-N sort over 781K rows (~1.5s just
|
||||
-- for the main result query, plus seq-scan in facet aggregates).
|
||||
--
|
||||
-- Fix: composite index matching the exact ORDER BY clause direction.
|
||||
-- Now Index Only Scan + early LIMIT termination → ~1ms.
|
||||
--
|
||||
-- Measurements (production, 781K rows):
|
||||
-- Before: 1543ms main query, 550ms count, ~3s total /cauta wall time
|
||||
-- After: 0.7ms main query, ~5ms with LEFT JOIN to cpv_codes/cui_location
|
||||
--
|
||||
-- Idempotent: IF NOT EXISTS; CONCURRENTLY to avoid table lock.
|
||||
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_ann_pub_date_desc_nl
|
||||
ON seap.announcements (publication_date DESC NULLS LAST, id DESC);
|
||||
@@ -0,0 +1,140 @@
|
||||
-- 046: snapshot of /achizitii/cauta no-filter facet aggregates.
|
||||
--
|
||||
-- Problem: /cauta home (no filters) still ~1.9s after sql/045's pub_date
|
||||
-- index fix. Main query is now ~5ms but 6 parallel facet aggregates each
|
||||
-- do full-table scans:
|
||||
-- - count(*) GROUP BY type (~200ms)
|
||||
-- - count(*) GROUP BY county_code (~200ms)
|
||||
-- - count(*) GROUP BY cpv_division (~200ms)
|
||||
-- - count(*) GROUP BY procedure_type (~200ms)
|
||||
-- - count(*) GROUP BY procedure_state (~200ms)
|
||||
-- - count(*) WHERE awarded_value bucket (~200ms)
|
||||
--
|
||||
-- Fix: materialize a single snapshot table holding all default-facet
|
||||
-- counts. Search code short-circuits to read from snapshot when filters
|
||||
-- are empty.
|
||||
--
|
||||
-- Wins only the no-filter case; any active filter still does live
|
||||
-- aggregates. That's intentional: filter combinations are exponentially
|
||||
-- many (cannot pre-materialize) and selective filters keep aggregates
|
||||
-- fast anyway.
|
||||
|
||||
BEGIN;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS public_kpi.cauta_default_facets (
|
||||
facet_name text NOT NULL,
|
||||
key text NOT NULL,
|
||||
label text,
|
||||
emoji text,
|
||||
count bigint NOT NULL,
|
||||
sort_order int NOT NULL DEFAULT 0,
|
||||
computed_at timestamptz NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (facet_name, key)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS public_kpi.cauta_default_totals (
|
||||
id smallint PRIMARY KEY DEFAULT 1 CHECK (id = 1),
|
||||
total bigint NOT NULL,
|
||||
sum_awarded numeric NOT NULL,
|
||||
computed_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION public_kpi.refresh_cauta_defaults()
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
-- Totals (single row, idempotent UPSERT)
|
||||
INSERT INTO public_kpi.cauta_default_totals (id, total, sum_awarded, computed_at)
|
||||
SELECT 1, count(*), COALESCE(sum(awarded_value), 0), now()
|
||||
FROM seap.announcements
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
total = EXCLUDED.total,
|
||||
sum_awarded = EXCLUDED.sum_awarded,
|
||||
computed_at = EXCLUDED.computed_at;
|
||||
|
||||
-- Wipe facet table, re-populate. Single transaction so reads see
|
||||
-- consistent state during refresh.
|
||||
DELETE FROM public_kpi.cauta_default_facets;
|
||||
|
||||
-- types (top 12)
|
||||
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, count, sort_order)
|
||||
SELECT 'type', type, type, count(*),
|
||||
ROW_NUMBER() OVER (ORDER BY count(*) DESC)
|
||||
FROM seap.announcements
|
||||
GROUP BY type
|
||||
ORDER BY count(*) DESC
|
||||
LIMIT 12;
|
||||
|
||||
-- counties (top 20)
|
||||
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, count, sort_order)
|
||||
SELECT 'county', county_code, county_code, count(*),
|
||||
ROW_NUMBER() OVER (ORDER BY count(*) DESC)
|
||||
FROM seap.announcements
|
||||
WHERE county_code IS NOT NULL
|
||||
GROUP BY county_code
|
||||
ORDER BY count(*) DESC
|
||||
LIMIT 20;
|
||||
|
||||
-- cpv divisions (top 15, with label + emoji from cpv_codes)
|
||||
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, emoji, count, sort_order)
|
||||
SELECT 'cpv', a.cpv_division, c.name_ro, c.emoji, count(*),
|
||||
ROW_NUMBER() OVER (ORDER BY count(*) DESC)
|
||||
FROM seap.announcements a
|
||||
LEFT JOIN seap.cpv_codes c ON c.code = a.cpv_division
|
||||
WHERE a.cpv_division IS NOT NULL
|
||||
GROUP BY a.cpv_division, c.name_ro, c.emoji
|
||||
ORDER BY count(*) DESC
|
||||
LIMIT 15;
|
||||
|
||||
-- procedure types (top 10)
|
||||
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, count, sort_order)
|
||||
SELECT 'procedure', procedure_type, procedure_type, count(*),
|
||||
ROW_NUMBER() OVER (ORDER BY count(*) DESC)
|
||||
FROM seap.announcements
|
||||
WHERE procedure_type IS NOT NULL
|
||||
GROUP BY procedure_type
|
||||
ORDER BY count(*) DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- procedure states (top 8)
|
||||
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, count, sort_order)
|
||||
SELECT 'state', procedure_state, procedure_state, count(*),
|
||||
ROW_NUMBER() OVER (ORDER BY count(*) DESC)
|
||||
FROM seap.announcements
|
||||
WHERE procedure_state IS NOT NULL
|
||||
GROUP BY procedure_state
|
||||
ORDER BY count(*) DESC
|
||||
LIMIT 8;
|
||||
|
||||
-- value buckets (5 buckets + "fără valoare")
|
||||
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, count, sort_order)
|
||||
SELECT 'value', bucket, bucket, count(*),
|
||||
CASE bucket
|
||||
WHEN 'sub 100K' THEN 1
|
||||
WHEN '100K – 1M' THEN 2
|
||||
WHEN '1M – 10M' THEN 3
|
||||
WHEN '10M – 100M' THEN 4
|
||||
WHEN 'peste 100M' THEN 5
|
||||
WHEN 'fără valoare' THEN 6
|
||||
END
|
||||
FROM (
|
||||
SELECT CASE
|
||||
WHEN awarded_value IS NULL OR awarded_value = 0 THEN 'fără valoare'
|
||||
WHEN awarded_value < 100000 THEN 'sub 100K'
|
||||
WHEN awarded_value < 1000000 THEN '100K – 1M'
|
||||
WHEN awarded_value < 10000000 THEN '1M – 10M'
|
||||
WHEN awarded_value < 100000000 THEN '10M – 100M'
|
||||
ELSE 'peste 100M'
|
||||
END AS bucket
|
||||
FROM seap.announcements
|
||||
) b
|
||||
GROUP BY bucket;
|
||||
|
||||
END;
|
||||
$$;
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- Initial population
|
||||
SELECT public_kpi.refresh_cauta_defaults();
|
||||
@@ -0,0 +1,18 @@
|
||||
-- 047: matching companion to sql/045 — awarded_value DESC NULLS LAST index.
|
||||
--
|
||||
-- Problem: /achizitii/cauta?sort=value_desc was ~3.5s. Same root cause as
|
||||
-- sql/045: existing `idx_ann_value btree (awarded_value)` defaults to
|
||||
-- ASC NULLS LAST, so ORDER BY awarded_value DESC NULLS LAST cannot use it
|
||||
-- (NULLS ordering mismatch). Planner did Seq Scan + top-N heap sort over
|
||||
-- 781K rows.
|
||||
--
|
||||
-- Fix: composite-direction index matching the exact ORDER BY clause.
|
||||
--
|
||||
-- Measurements (production):
|
||||
-- Before: 1284ms (Seq Scan + Sort)
|
||||
-- After: 0.4ms (Index Scan + LIMIT)
|
||||
--
|
||||
-- Existing idx_ann_value is kept for the value_asc sort case.
|
||||
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_ann_value_desc_nl
|
||||
ON seap.announcements (awarded_value DESC NULLS LAST);
|
||||
Reference in New Issue
Block a user