initial: split from gov-agreg — vreau.digital standalone platform

Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
Claude VM
2026-05-13 00:10:32 +03:00
commit a6c03a091e
352 changed files with 75295 additions and 0 deletions
+189
View File
@@ -0,0 +1,189 @@
-- SEAP Data Schema for Harta Banilor Publici
-- Runs inside architools_db, isolated in schema "seap"
-- ZERO modifications to existing public.* tables
BEGIN;
-- Enable extensions needed for fuzzy matching
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE EXTENSION IF NOT EXISTS unaccent;
CREATE SCHEMA IF NOT EXISTS seap;
-- ── Entități SEAP (autorități contractante + furnizori) ──
CREATE TABLE seap.entities (
entity_id INTEGER PRIMARY KEY,
entity_type TEXT NOT NULL CHECK (entity_type IN ('authority', 'supplier')),
fiscal_number TEXT,
name TEXT NOT NULL,
city TEXT,
county TEXT,
address TEXT,
postal_code TEXT,
is_utility BOOLEAN,
siruta TEXT REFERENCES public."GisUat"(siruta),
match_score REAL,
fetched_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX idx_entities_fiscal ON seap.entities(fiscal_number);
CREATE INDEX idx_entities_siruta ON seap.entities(siruta);
CREATE INDEX idx_entities_type ON seap.entities(entity_type);
CREATE INDEX idx_entities_county ON seap.entities(county);
-- ── Achiziții directe ──
CREATE TABLE seap.direct_acquisitions (
id INTEGER PRIMARY KEY,
unique_code TEXT UNIQUE,
name TEXT,
cpv_code TEXT,
cpv_name TEXT,
publication_date TIMESTAMPTZ,
finalization_date TIMESTAMPTZ,
estimated_value NUMERIC(15,2),
closing_value NUMERIC(15,2),
currency TEXT DEFAULT 'RON',
state_id INTEGER,
state_text TEXT,
contract_type_id INTEGER,
contract_type_text TEXT,
eu_fund_id INTEGER,
eu_fund_text TEXT,
authority_id INTEGER REFERENCES seap.entities(entity_id),
supplier_id INTEGER REFERENCES seap.entities(entity_id),
fetched_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX idx_da_authority ON seap.direct_acquisitions(authority_id);
CREATE INDEX idx_da_supplier ON seap.direct_acquisitions(supplier_id);
CREATE INDEX idx_da_finalization ON seap.direct_acquisitions(finalization_date);
CREATE INDEX idx_da_publication ON seap.direct_acquisitions(publication_date);
CREATE INDEX idx_da_cpv ON seap.direct_acquisitions(cpv_code);
CREATE INDEX idx_da_value ON seap.direct_acquisitions(closing_value);
-- ── Licitații publice (contract award notices) ──
CREATE TABLE seap.public_notices (
id INTEGER PRIMARY KEY,
notice_no TEXT,
contract_title TEXT,
cpv_code TEXT,
cpv_name TEXT,
estimated_value NUMERIC(15,2),
contract_value NUMERIC(15,2),
currency TEXT DEFAULT 'RON',
publication_date TIMESTAMPTZ,
state_date TIMESTAMPTZ,
procedure_type_id INTEGER,
procedure_type_text TEXT,
contract_type_id INTEGER,
contract_type_text TEXT,
notice_type_id INTEGER,
state_id INTEGER,
state_text TEXT,
authority_id INTEGER REFERENCES seap.entities(entity_id),
authority_city TEXT,
authority_county TEXT,
authority_siruta TEXT REFERENCES public."GisUat"(siruta),
has_lots BOOLEAN DEFAULT false,
fetched_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX idx_pn_authority ON seap.public_notices(authority_id);
CREATE INDEX idx_pn_date ON seap.public_notices(publication_date);
CREATE INDEX idx_pn_siruta ON seap.public_notices(authority_siruta);
CREATE INDEX idx_pn_cpv ON seap.public_notices(cpv_code);
-- ── Contracte câștigate (din section 5 a licitațiilor) ──
CREATE TABLE seap.notice_contracts (
id SERIAL PRIMARY KEY,
notice_id INTEGER REFERENCES seap.public_notices(id),
lot_number INTEGER,
lot_title TEXT,
contract_value NUMERIC(15,2),
currency TEXT DEFAULT 'RON',
contract_date DATE,
winner_id INTEGER REFERENCES seap.entities(entity_id),
winner_name TEXT,
winner_fiscal TEXT,
winner_city TEXT,
winner_county TEXT,
winner_siruta TEXT REFERENCES public."GisUat"(siruta),
num_offers INTEGER
);
CREATE INDEX idx_nc_notice ON seap.notice_contracts(notice_id);
CREATE INDEX idx_nc_winner ON seap.notice_contracts(winner_id);
CREATE INDEX idx_nc_winner_siruta ON seap.notice_contracts(winner_siruta);
-- ── Matching localități SEAP → SIRUTA ──
CREATE TABLE seap.locality_map (
seap_city TEXT NOT NULL,
seap_county TEXT NOT NULL,
siruta TEXT REFERENCES public."GisUat"(siruta),
match_type TEXT,
confidence REAL,
PRIMARY KEY (seap_city, seap_county)
);
-- ── Stare sync scraper ──
CREATE TABLE seap.sync_state (
source TEXT PRIMARY KEY,
last_date TIMESTAMPTZ,
last_id INTEGER,
status TEXT,
updated_at TIMESTAMPTZ DEFAULT now()
);
INSERT INTO seap.sync_state (source, status) VALUES
('da', 'pending'),
('notices', 'pending');
-- ── Helper: normalize locality names ──
CREATE OR REPLACE FUNCTION seap.normalize_locality(input TEXT)
RETURNS TEXT LANGUAGE sql IMMUTABLE AS $$
SELECT lower(trim(unaccent(
regexp_replace(input, '\s+', ' ', 'g')
)));
$$;
-- ── Materialized view: procurement stats per UAT ──
CREATE MATERIALIZED VIEW seap.uat_procurement_stats AS
SELECT
u.siruta,
u.name AS uat_name,
u.county,
COALESCE(da_stats.da_count, 0) AS da_count,
COALESCE(da_stats.da_total_value, 0) AS da_total_value,
COALESCE(pn_stats.notice_count, 0) AS notice_count,
COALESCE(pn_stats.notice_total_value, 0) AS notice_total_value,
COALESCE(da_stats.da_count, 0) + COALESCE(pn_stats.notice_count, 0) AS total_contracts,
COALESCE(da_stats.da_total_value, 0) + COALESCE(pn_stats.notice_total_value, 0) AS total_value
FROM public."GisUat" u
LEFT JOIN LATERAL (
SELECT
COUNT(*) AS da_count,
SUM(da.closing_value) AS da_total_value
FROM seap.direct_acquisitions da
JOIN seap.entities e ON e.entity_id = da.authority_id
WHERE e.siruta = u.siruta
) da_stats ON true
LEFT JOIN LATERAL (
SELECT
COUNT(*) AS notice_count,
SUM(pn.contract_value) AS notice_total_value
FROM seap.public_notices pn
WHERE pn.authority_siruta = u.siruta
) pn_stats ON true;
CREATE UNIQUE INDEX idx_ups_siruta ON seap.uat_procurement_stats(siruta);
COMMIT;
@@ -0,0 +1,52 @@
-- Unified announcements table for all SEAP data types
BEGIN;
CREATE TABLE IF NOT EXISTS seap.announcements (
id BIGSERIAL PRIMARY KEY,
type TEXT NOT NULL,
ref_number TEXT NOT NULL,
authority_name TEXT,
authority_cui TEXT,
authority_siruta TEXT,
title TEXT,
cpv_code TEXT,
cpv_name TEXT,
contract_type TEXT,
publication_date TIMESTAMPTZ,
finalization_date TIMESTAMPTZ,
contract_date DATE,
estimated_value NUMERIC(15,2),
awarded_value NUMERIC(15,2),
currency TEXT DEFAULT 'RON',
supplier_name TEXT,
supplier_cui TEXT,
supplier_siruta TEXT,
procedure_type TEXT,
procedure_state TEXT,
award_type TEXT,
legislation TEXT,
criterion TEXT,
eu_funded TEXT,
eu_program TEXT,
lot_number INTEGER,
has_lots TEXT,
joue TEXT,
value_before NUMERIC(15,2),
value_after NUMERIC(15,2),
modification_desc TEXT,
seap_url TEXT,
source TEXT DEFAULT 'datagov',
imported_at TIMESTAMPTZ DEFAULT now(),
UNIQUE(type, ref_number)
);
CREATE INDEX IF NOT EXISTS idx_ann_type ON seap.announcements(type);
CREATE INDEX IF NOT EXISTS idx_ann_auth_cui ON seap.announcements(authority_cui);
CREATE INDEX IF NOT EXISTS idx_ann_auth_siruta ON seap.announcements(authority_siruta);
CREATE INDEX IF NOT EXISTS idx_ann_sup_cui ON seap.announcements(supplier_cui);
CREATE INDEX IF NOT EXISTS idx_ann_pub_date ON seap.announcements(publication_date);
CREATE INDEX IF NOT EXISTS idx_ann_value ON seap.announcements(awarded_value);
CREATE INDEX IF NOT EXISTS idx_ann_cpv ON seap.announcements(cpv_code);
CREATE INDEX IF NOT EXISTS idx_ann_contract_type ON seap.announcements(contract_type);
COMMIT;
@@ -0,0 +1,98 @@
-- Platform tables for submissions + voting
BEGIN;
CREATE SCHEMA IF NOT EXISTS platform;
-- Ideas/submissions — anyone can propose
CREATE TABLE platform.ideas (
id BIGSERIAL PRIMARY KEY,
title TEXT NOT NULL,
problem TEXT NOT NULL, -- "Ce te deranjează?"
solution TEXT, -- "Cum ar trebui să fie?"
category TEXT DEFAULT 'general', -- transparenta, cereri, ai, educatie, sanatate, etc
author_name TEXT, -- optional
author_email TEXT, -- optional, for follow-up
author_city TEXT, -- optional
status TEXT DEFAULT 'nou', -- nou, în discuție, în lucru, mvp, live, respins
votes INTEGER DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX idx_ideas_votes ON platform.ideas(votes DESC);
CREATE INDEX idx_ideas_status ON platform.ideas(status);
CREATE INDEX idx_ideas_created ON platform.ideas(created_at DESC);
CREATE INDEX idx_ideas_category ON platform.ideas(category);
-- Votes — fingerprint-based (no accounts)
CREATE TABLE platform.votes (
id BIGSERIAL PRIMARY KEY,
idea_id BIGINT REFERENCES platform.ideas(id) ON DELETE CASCADE,
fingerprint TEXT NOT NULL, -- hash of IP + user-agent
created_at TIMESTAMPTZ DEFAULT now(),
UNIQUE(idea_id, fingerprint)
);
-- Comments on ideas — simple, no accounts
CREATE TABLE platform.comments (
id BIGSERIAL PRIMARY KEY,
idea_id BIGINT REFERENCES platform.ideas(id) ON DELETE CASCADE,
author_name TEXT DEFAULT 'Anonim',
content TEXT NOT NULL,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX idx_comments_idea ON platform.comments(idea_id, created_at);
-- Seed some initial ideas to get things started
INSERT INTO platform.ideas (title, problem, solution, category, author_name, status, votes) VALUES
(
'Verificare status dosar la orice instituție',
'Trebuie să mergi fizic sau să suni repetat ca să afli ce se întâmplă cu dosarul tău. Fiecare instituție are alt sistem, unele nu au deloc.',
'O platformă unificată unde introduci numărul de dosar și vezi statusul instant, indiferent de instituție.',
'cereri', 'Comunitate', 'nou', 42
),
(
'Extras Carte Funciară online, instant',
'Durează 3-5 zile și necesită deplasare la OCPI. În 2026, un document public ar trebui disponibil online.',
'Introduci număr cadastral → primești PDF cu extrasul CF. Fără deplasare, fără așteptare.',
'cereri', 'Comunitate', 'nou', 38
),
(
'Certificat fiscal în 30 de secunde',
'Stai la coadă la primărie, plătești timbru, aștepți 1-3 zile. De 3 ori pe an minim, dacă ai firmă.',
'CNP sau CUI → certificat fiscal digital, semnat electronic, valid legal.',
'cereri', 'Comunitate', 'nou', 35
),
(
'Programare buletin/pașaport care chiar funcționează',
'Sistemul MAI e permanent supraîncărcat, cade, nu găsești slot-uri. Ajungi la 4 dimineața la coadă.',
'Calendar cu disponibilitate reală, notificare când se eliberează slot, programare în 3 click-uri.',
'cereri', 'Comunitate', 'nou', 50
),
(
'Calculator taxe și impozite locale',
'Nu știi cât datorezi, trebuie să mergi la primărie să afli. Fiecare primărie calculează diferit.',
'Introdu adresa sau nr. cadastral → vezi toate taxele datorate, cu deadline-uri și posibilitate de plată.',
'transparenta', 'Comunitate', 'nou', 30
),
(
'Monitor licitații publice cu alerte',
'Informația e dispersată, greu de urmărit. Firmele mici pierd oportunități pentru că nu știu de ele.',
'Feed cu licitații filtrat pe domeniu/județ/valoare. Alerte pe email când apare ceva relevant.',
'transparenta', 'Comunitate', 'în lucru', 25
),
(
'Profil digital per primărie',
'Nu existe un loc centralizat unde să vezi cum performează primăria ta: buget, licitații, servicii digitale.',
'Pagina per primărie cu: buget, top cheltuieli, licitații, nivel digitalizare, comparație cu altele.',
'transparenta', 'Comunitate', 'în lucru', 22
),
(
'Generator cereri și petiții cu AI',
'Oamenii nu știu cum să formuleze o cerere oficială. Limbajul birocratic intimidează.',
'Descrii în cuvintele tale ce vrei → AI generează cererea completă, cu referințe legale corecte.',
'ai', 'Comunitate', 'nou', 28
);
COMMIT;
+174
View File
@@ -0,0 +1,174 @@
-- WSP integration: schema extensions for SEAP web service ingestion.
-- Idempotent: safe to re-run on existing DB (already has ~600K rows in seap.announcements).
BEGIN;
-- ── Extend seap.announcements for WSP-specific structured + raw data ──
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS county_code TEXT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS notice_state TEXT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS notice_state_id INT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS deadline_submission TIMESTAMPTZ;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS opening_date TIMESTAMPTZ;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS duration_months INT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS duration_days INT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_address TEXT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_email TEXT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_phone TEXT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_url TEXT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_type TEXT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_main_activity TEXT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS supplier_address TEXT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS supplier_is_sme BOOLEAN;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS framework_agreement BOOLEAN;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS lots_count INT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS contract_has_lots BOOLEAN;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS award_criteria JSONB;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS lots JSONB;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS documents JSONB;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS details JSONB; -- raw Section1-6 nested
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS notice_id_internal BIGINT; -- WSP CNoticeId / CaNoticeId
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS authority_entity_id INT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS supplier_entity_id INT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS enriched_at TIMESTAMPTZ;
CREATE INDEX IF NOT EXISTS idx_ann_county ON seap.announcements(county_code);
CREATE INDEX IF NOT EXISTS idx_ann_state ON seap.announcements(notice_state);
CREATE INDEX IF NOT EXISTS idx_ann_deadline ON seap.announcements(deadline_submission) WHERE deadline_submission IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_ann_authority_name_trgm ON seap.announcements USING gin(authority_name gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_ann_supplier_name_trgm ON seap.announcements USING gin(supplier_name gin_trgm_ops);
-- pg_trgm for fuzzy authority/supplier name search (idempotent)
CREATE EXTENSION IF NOT EXISTS pg_trgm;
-- ── Sync state: cursor per WSP feed ──
CREATE TABLE IF NOT EXISTS seap.wsp_sync_state (
feed TEXT PRIMARY KEY, -- e.g. 'ca_notices', 'c_notices', 'su_contracts'
last_run_at TIMESTAMPTZ,
last_cursor_date TIMESTAMPTZ, -- highest publication_date successfully ingested
last_window_start TIMESTAMPTZ,
last_window_end TIMESTAMPTZ,
items_imported_total BIGINT DEFAULT 0,
items_imported_24h INT DEFAULT 0,
consecutive_errors INT DEFAULT 0,
last_error TEXT,
last_error_at TIMESTAMPTZ,
notes TEXT
);
-- ── Backfill window queue: each window is a checkpoint ──
CREATE TABLE IF NOT EXISTS seap.wsp_backfill_windows (
id BIGSERIAL PRIMARY KEY,
feed TEXT NOT NULL,
window_start TIMESTAMPTZ NOT NULL,
window_end TIMESTAMPTZ NOT NULL,
county_code TEXT, -- optional partition
state TEXT NOT NULL DEFAULT 'pending', -- pending, in_progress, completed, failed, skipped
items_imported INT DEFAULT 0,
page_total INT,
attempts INT DEFAULT 0,
last_error TEXT,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now(),
UNIQUE(feed, window_start, window_end, county_code)
);
CREATE INDEX IF NOT EXISTS idx_wsp_bf_state ON seap.wsp_backfill_windows(feed, state, window_start);
CREATE INDEX IF NOT EXISTS idx_wsp_bf_pending ON seap.wsp_backfill_windows(feed, window_start) WHERE state = 'pending';
-- ── Beletage-scoped tables (Su* operations) ──
CREATE TABLE IF NOT EXISTS seap.beletage_contracts (
contract_id BIGINT PRIMARY KEY, -- WSP ContractId
contract_no TEXT,
contract_title TEXT,
contract_type TEXT,
contract_phase TEXT,
contract_state TEXT,
awarding_date DATE,
contract_date DATE,
publication_date TIMESTAMPTZ,
duration_months INT,
contract_value NUMERIC(15,2),
default_currency_value NUMERIC(15,2),
currency TEXT,
ca_notice_id BIGINT, -- link to public CA notice
ca_notice_no TEXT,
authority_name TEXT,
authority_cui TEXT,
is_current_version BOOLEAN,
is_rejected BOOLEAN,
version_no INT,
version_date TIMESTAMPTZ,
justification TEXT,
additional_information TEXT,
details JSONB, -- raw CANotice + ContractPhases + ContractSections
imported_at TIMESTAMPTZ DEFAULT now(),
enriched_at TIMESTAMPTZ
);
CREATE INDEX IF NOT EXISTS idx_beletage_contracts_date ON seap.beletage_contracts(awarding_date DESC);
CREATE INDEX IF NOT EXISTS idx_beletage_contracts_authority ON seap.beletage_contracts(authority_cui);
CREATE TABLE IF NOT EXISTS seap.beletage_invoices (
invoice_id BIGINT PRIMARY KEY, -- WSP InvoiceId
invoice_no TEXT,
invoice_date DATE,
due_date DATE,
contract_id BIGINT, -- FK soft to beletage_contracts
contract_no TEXT,
authority_name TEXT,
authority_cui TEXT,
total_value NUMERIC(15,2),
total_value_no_vat NUMERIC(15,2),
vat_value NUMERIC(15,2),
currency TEXT,
state TEXT,
paid_value NUMERIC(15,2),
paid_at TIMESTAMPTZ,
details JSONB, -- raw InvoiceItem + payments + details
imported_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_beletage_invoices_date ON seap.beletage_invoices(invoice_date DESC);
CREATE INDEX IF NOT EXISTS idx_beletage_invoices_contract ON seap.beletage_invoices(contract_id);
CREATE TABLE IF NOT EXISTS seap.beletage_direct_acquisitions (
da_id BIGINT PRIMARY KEY, -- WSP DirectAcquisitionId
da_name TEXT,
unique_identification_code TEXT,
cpv_code TEXT,
cpv_name TEXT,
contract_type TEXT,
publication_date TIMESTAMPTZ,
finalization_date TIMESTAMPTZ,
estimated_value NUMERIC(15,2),
closing_value NUMERIC(15,2),
currency TEXT,
da_state TEXT,
authority_id INT,
authority_name TEXT,
authority_cui TEXT,
details JSONB,
imported_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_beletage_da_date ON seap.beletage_direct_acquisitions(finalization_date DESC);
-- ── Beletage catalog (if used) ──
CREATE TABLE IF NOT EXISTS seap.beletage_catalog (
item_code TEXT PRIMARY KEY,
item_name TEXT,
cpv_code TEXT,
unit_price NUMERIC(15,2),
currency TEXT,
last_updated TIMESTAMPTZ,
details JSONB,
imported_at TIMESTAMPTZ DEFAULT now()
);
-- ── Materialized views for hub UI (refresh nightly) ──
-- Will be added in 005 once bulk data is in; placeholder comment here for traceability.
COMMIT;
+121
View File
@@ -0,0 +1,121 @@
-- Materialized views for hub UI — refreshed nightly after WSP sync.
-- Provides fast aggregations for "Achiziții România live" dashboards.
BEGIN;
-- ── Daily totals: count + value per day (across all WSP sources) ──
CREATE MATERIALIZED VIEW IF NOT EXISTS seap.mv_daily_totals AS
SELECT
date_trunc('day', publication_date)::date AS day,
type,
count(*) AS notices,
sum(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS total_awarded,
sum(estimated_value) FILTER (WHERE estimated_value IS NOT NULL) AS total_estimated,
count(DISTINCT authority_cui) AS distinct_authorities,
count(DISTINCT supplier_cui) AS distinct_suppliers
FROM seap.announcements
WHERE source LIKE 'wsp_%'
AND publication_date >= now() - interval '24 months'
GROUP BY 1, 2;
CREATE INDEX IF NOT EXISTS idx_mv_daily_totals_day ON seap.mv_daily_totals(day DESC);
-- ── Top contracting authorities (last 12 months by total awarded value) ──
CREATE MATERIALIZED VIEW IF NOT EXISTS seap.mv_top_authorities AS
SELECT
authority_cui,
authority_name,
county_code,
count(*) AS notices_count,
count(*) FILTER (WHERE type = 'ca_notice') AS awarded_count,
sum(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS total_awarded,
avg(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS avg_awarded,
array_agg(DISTINCT cpv_code) FILTER (WHERE cpv_code IS NOT NULL) AS cpv_codes,
max(publication_date) AS most_recent
FROM seap.announcements
WHERE source LIKE 'wsp_%'
AND authority_cui IS NOT NULL
AND publication_date >= now() - interval '12 months'
GROUP BY 1, 2, 3
HAVING count(*) >= 1;
CREATE INDEX IF NOT EXISTS idx_mv_top_auth_value ON seap.mv_top_authorities(total_awarded DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_mv_top_auth_cui ON seap.mv_top_authorities(authority_cui);
CREATE INDEX IF NOT EXISTS idx_mv_top_auth_county ON seap.mv_top_authorities(county_code);
-- ── Top suppliers (firms that won contracts) ──
CREATE MATERIALIZED VIEW IF NOT EXISTS seap.mv_top_suppliers AS
SELECT
supplier_cui,
supplier_name,
count(*) AS contracts_won,
sum(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS total_awarded,
avg(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS avg_awarded,
count(DISTINCT authority_cui) AS distinct_clients,
array_agg(DISTINCT cpv_code) FILTER (WHERE cpv_code IS NOT NULL) AS cpv_codes,
max(publication_date) AS most_recent
FROM seap.announcements
WHERE source LIKE 'wsp_%'
AND supplier_cui IS NOT NULL
AND type = 'ca_notice'
AND publication_date >= now() - interval '12 months'
GROUP BY 1, 2;
CREATE INDEX IF NOT EXISTS idx_mv_top_supp_value ON seap.mv_top_suppliers(total_awarded DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_mv_top_supp_cui ON seap.mv_top_suppliers(supplier_cui);
-- ── Top CPV codes (most-used categories) ──
CREATE MATERIALIZED VIEW IF NOT EXISTS seap.mv_top_cpv AS
SELECT
cpv_code,
count(*) AS notices_count,
sum(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS total_awarded,
count(DISTINCT authority_cui) AS distinct_buyers,
count(DISTINCT supplier_cui) AS distinct_winners
FROM seap.announcements
WHERE source LIKE 'wsp_%'
AND cpv_code IS NOT NULL
AND publication_date >= now() - interval '12 months'
GROUP BY 1;
CREATE INDEX IF NOT EXISTS idx_mv_top_cpv_value ON seap.mv_top_cpv(total_awarded DESC NULLS LAST);
-- ── County totals (for map) ──
CREATE MATERIALIZED VIEW IF NOT EXISTS seap.mv_county_totals AS
SELECT
county_code,
type,
count(*) AS notices_count,
sum(awarded_value) FILTER (WHERE awarded_value IS NOT NULL) AS total_awarded
FROM seap.announcements
WHERE source LIKE 'wsp_%'
AND county_code IS NOT NULL
AND publication_date >= now() - interval '12 months'
GROUP BY 1, 2;
CREATE INDEX IF NOT EXISTS idx_mv_county_totals_code ON seap.mv_county_totals(county_code);
-- ── Refresh function (called by cron after daily sync) ──
CREATE OR REPLACE FUNCTION seap.refresh_wsp_views()
RETURNS void AS $$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_daily_totals;
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_authorities;
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_suppliers;
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_cpv;
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_county_totals;
EXCEPTION WHEN feature_not_supported THEN
-- CONCURRENTLY requires unique index; first refresh is non-concurrent
REFRESH MATERIALIZED VIEW seap.mv_daily_totals;
REFRESH MATERIALIZED VIEW seap.mv_top_authorities;
REFRESH MATERIALIZED VIEW seap.mv_top_suppliers;
REFRESH MATERIALIZED VIEW seap.mv_top_cpv;
REFRESH MATERIALIZED VIEW seap.mv_county_totals;
END;
$$ LANGUAGE plpgsql;
COMMIT;
@@ -0,0 +1,71 @@
-- Map WSP rows to UAT SIRUTA codes + extend the harta UAT stats view.
-- (Suppliers may have "RO " prefix; authorities are clean. Strip both forms.)
BEGIN;
-- Indexes to make the UPDATE fast
CREATE INDEX IF NOT EXISTS idx_ann_auth_cui_wsp ON seap.announcements(authority_cui)
WHERE source LIKE 'wsp_%' AND authority_siruta IS NULL AND authority_cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_ann_supp_cui_wsp ON seap.announcements(supplier_cui)
WHERE source LIKE 'wsp_%' AND supplier_siruta IS NULL AND supplier_cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_cui_loc_cui ON seap.cui_location(cui) WHERE siruta IS NOT NULL;
-- Authority — clean numeric CUI (direct match)
UPDATE seap.announcements a
SET authority_siruta = cl.siruta
FROM seap.cui_location cl
WHERE a.source LIKE 'wsp_%'
AND a.authority_siruta IS NULL
AND a.authority_cui IS NOT NULL
AND cl.siruta IS NOT NULL
AND cl.cui = a.authority_cui;
-- Suppliers — may have "RO " prefix, strip and retry the rest
UPDATE seap.announcements a
SET supplier_siruta = cl.siruta
FROM seap.cui_location cl
WHERE a.source LIKE 'wsp_%'
AND a.supplier_siruta IS NULL
AND a.supplier_cui IS NOT NULL
AND cl.siruta IS NOT NULL
AND cl.cui = trim(regexp_replace(a.supplier_cui, '^RO\s*', '', 'i'));
-- Extend uat_procurement_stats view to include WSP types
DROP MATERIALIZED VIEW IF EXISTS seap.uat_procurement_stats CASCADE;
CREATE MATERIALIZED VIEW seap.uat_procurement_stats AS
SELECT
u.siruta,
u.name AS uat_name,
u.county,
COALESCE(s.da_count, 0::bigint) AS da_count,
COALESCE(s.da_value, 0::numeric) AS da_total_value,
COALESCE(s.contract_count, 0::bigint) AS notice_count,
COALESCE(s.contract_value, 0::numeric) AS notice_total_value,
COALESCE(s.total_count, 0::bigint) AS total_contracts,
COALESCE(s.total_value, 0::numeric) AS total_value
FROM "GisUat" u
LEFT JOIN (
SELECT
authority_siruta AS siruta,
count(*) FILTER (WHERE type = 'da') AS da_count,
sum(awarded_value) FILTER (WHERE type = 'da') AS da_value,
count(*) FILTER (WHERE type IN (
'contract', 'atribuire_fara', 'ted_notice',
'ca_notice', 'rfq_notice'
)) AS contract_count,
sum(awarded_value) FILTER (WHERE type IN (
'contract', 'atribuire_fara', 'ted_notice',
'ca_notice', 'rfq_notice'
)) AS contract_value,
count(*) AS total_count,
sum(COALESCE(awarded_value, estimated_value, 0::numeric)) AS total_value
FROM seap.announcements
WHERE authority_siruta IS NOT NULL
GROUP BY authority_siruta
) s ON s.siruta = u.siruta;
CREATE UNIQUE INDEX uq_uat_proc_stats ON seap.uat_procurement_stats(siruta);
CREATE INDEX idx_uat_proc_stats_value ON seap.uat_procurement_stats(total_value DESC NULLS LAST);
CREATE INDEX idx_uat_proc_stats_county ON seap.uat_procurement_stats(county);
COMMIT;
@@ -0,0 +1,71 @@
-- CPV nomenclature: 9,454 codes with Romanian names + EU emojis.
-- Loaded from samhallskod/cpv-eu (data sourced from official EU CPV 2008 XML).
BEGIN;
CREATE TABLE IF NOT EXISTS seap.cpv_codes (
code TEXT PRIMARY KEY, -- 8-digit (no check digit), e.g. '45000000'
code_full TEXT, -- 8-digit + check, e.g. '45000000-7'
name_ro TEXT NOT NULL,
name_en TEXT,
level INT NOT NULL, -- 1=division (45), 2=group (450), 3=class (4500), ...
division_code TEXT NOT NULL, -- first 2 digits + 6 zeroes, e.g. '45000000' (top-level parent)
parent_code TEXT, -- one level up
emoji TEXT, -- only set on division level
imported_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_cpv_division ON seap.cpv_codes(division_code);
CREATE INDEX IF NOT EXISTS idx_cpv_parent ON seap.cpv_codes(parent_code);
CREATE INDEX IF NOT EXISTS idx_cpv_level ON seap.cpv_codes(level);
CREATE INDEX IF NOT EXISTS idx_cpv_name_trgm ON seap.cpv_codes USING gin(name_ro gin_trgm_ops);
-- Helper: normalize "45123456-7" or "45123456" or empty → "45123456" (8-digit, no dash)
CREATE OR REPLACE FUNCTION seap.cpv_normalize(code TEXT)
RETURNS TEXT AS $$
BEGIN
IF code IS NULL OR code = '' THEN RETURN NULL; END IF;
-- Strip the check digit suffix (-X) and any whitespace
RETURN regexp_replace(trim(code), '-[0-9]$', '');
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT;
-- Helper: get division code (first 2 digits + 6 zeros)
CREATE OR REPLACE FUNCTION seap.cpv_division(code TEXT)
RETURNS TEXT AS $$
BEGIN
IF code IS NULL OR length(code) < 2 THEN RETURN NULL; END IF;
RETURN substr(seap.cpv_normalize(code), 1, 2) || '000000';
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT;
-- Get name_ro for a code, fallback to division name, fallback to code itself
CREATE OR REPLACE FUNCTION seap.cpv_name(code TEXT)
RETURNS TEXT AS $$
DECLARE
result TEXT;
BEGIN
SELECT name_ro INTO result FROM seap.cpv_codes WHERE code = seap.cpv_normalize($1);
IF result IS NOT NULL THEN RETURN result; END IF;
SELECT name_ro INTO result FROM seap.cpv_codes WHERE code = seap.cpv_division($1);
IF result IS NOT NULL THEN RETURN result; END IF;
RETURN $1;
END;
$$ LANGUAGE plpgsql STABLE;
-- Get top-level category name + emoji for any code
CREATE OR REPLACE VIEW seap.cpv_division_lookup AS
SELECT code AS division_code, name_ro AS division_name, emoji
FROM seap.cpv_codes WHERE level = 1;
-- Add denormalized columns to announcements for fast queries
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS cpv_division TEXT;
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS cpv_name_ro TEXT;
CREATE INDEX IF NOT EXISTS idx_ann_cpv_division ON seap.announcements(cpv_division);
COMMIT;
@@ -0,0 +1,233 @@
-- Risk flags (red flags) for procurement transparency, based on OCP indicators.
-- Idempotent: safe to re-run.
BEGIN;
-- ── Column on announcements ──
ALTER TABLE seap.announcements
ADD COLUMN IF NOT EXISTS risk_flags JSONB;
CREATE INDEX IF NOT EXISTS idx_ann_risk_flags
ON seap.announcements USING gin(risk_flags)
WHERE risk_flags IS NOT NULL AND jsonb_array_length(risk_flags) > 0;
-- ── Materialized view: per-CPV-division median awarded value ──
DROP MATERIALIZED VIEW IF EXISTS seap.mv_cpv_median_value CASCADE;
CREATE MATERIALIZED VIEW seap.mv_cpv_median_value AS
SELECT
cpv_division,
count(*)::int AS contracts,
percentile_cont(0.5) WITHIN GROUP (ORDER BY awarded_value)::numeric(15,2) AS median_value,
avg(awarded_value)::numeric(15,2) AS avg_value,
percentile_cont(0.95) WITHIN GROUP (ORDER BY awarded_value)::numeric(15,2) AS p95_value
FROM seap.announcements
WHERE awarded_value IS NOT NULL
AND awarded_value > 0
AND cpv_division IS NOT NULL
GROUP BY cpv_division
HAVING count(*) >= 5;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_cpv_median_pk
ON seap.mv_cpv_median_value(cpv_division);
-- ── Materialized view: authority supplier concentration (top supplier % of yearly value) ──
DROP MATERIALIZED VIEW IF EXISTS seap.mv_authority_concentration CASCADE;
CREATE MATERIALIZED VIEW seap.mv_authority_concentration AS
WITH yearly_pairs AS (
SELECT
a.authority_cui,
MIN(a.authority_name) AS authority_name,
EXTRACT(YEAR FROM a.publication_date)::int AS year,
a.supplier_cui,
MIN(a.supplier_name) AS supplier_name,
SUM(a.awarded_value)::numeric(15,2) AS total_value,
COUNT(*)::int AS contracts
FROM seap.announcements a
WHERE a.authority_cui IS NOT NULL
AND a.supplier_cui IS NOT NULL
AND a.awarded_value IS NOT NULL
AND a.awarded_value > 0
AND a.publication_date IS NOT NULL
AND a.publication_date >= now() - interval '36 months'
GROUP BY a.authority_cui, EXTRACT(YEAR FROM a.publication_date), a.supplier_cui
),
yearly_totals AS (
SELECT
authority_cui,
year,
SUM(total_value) AS year_total,
SUM(contracts) AS year_contracts
FROM yearly_pairs
GROUP BY authority_cui, year
),
ranked AS (
SELECT
p.authority_cui,
p.authority_name,
p.year,
p.supplier_cui,
p.supplier_name,
p.total_value,
p.contracts,
t.year_total,
t.year_contracts,
ROW_NUMBER() OVER (PARTITION BY p.authority_cui, p.year ORDER BY p.total_value DESC) AS rn,
(p.total_value / NULLIF(t.year_total, 0))::numeric(6,4) AS share
FROM yearly_pairs p
JOIN yearly_totals t USING (authority_cui, year)
)
SELECT
authority_cui,
authority_name,
year,
supplier_cui AS top_supplier_cui,
supplier_name AS top_supplier_name,
total_value AS top_supplier_value,
contracts AS top_supplier_contracts,
year_total,
year_contracts,
share AS top_supplier_share
FROM ranked
WHERE rn = 1
AND year_total >= 100000; -- skip tiny totals (noise)
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_auth_conc_pk
ON seap.mv_authority_concentration(authority_cui, year);
CREATE INDEX IF NOT EXISTS idx_mv_auth_conc_share
ON seap.mv_authority_concentration(top_supplier_share DESC NULLS LAST);
-- ── View: single-bidder contracts ──
DROP VIEW IF EXISTS seap.v_single_bidder CASCADE;
CREATE VIEW seap.v_single_bidder AS
SELECT a.*
FROM seap.announcements a
WHERE a.type = 'ca_notice'
AND (
a.num_offers = 1
OR (
a.details IS NOT NULL
AND jsonb_typeof(a.details->'all_winners') = 'array'
AND jsonb_array_length(a.details->'all_winners') = 1
)
);
-- ── Function: compute risk flags for a single announcement ──
-- Returns JSONB array of { code, severity, label, detail? }
CREATE OR REPLACE FUNCTION seap.compute_announcement_flags(
p_id BIGINT
) RETURNS JSONB
LANGUAGE plpgsql
AS $$
DECLARE
rec RECORD;
flags JSONB := '[]'::jsonb;
v_median NUMERIC;
BEGIN
SELECT a.id, a.type, a.publication_date, a.deadline_submission,
a.awarded_value, a.estimated_value, a.cpv_division,
a.num_offers, a.details
INTO rec
FROM seap.announcements a WHERE a.id = p_id;
IF NOT FOUND THEN RETURN NULL; END IF;
-- 1) Single bidder (only meaningful for ca_notice with winner data)
IF rec.type = 'ca_notice' THEN
IF rec.num_offers = 1 THEN
flags := flags || jsonb_build_object(
'code', 'single_bidder',
'severity', 'high',
'label', 'Un singur ofertant'
);
ELSIF rec.details IS NOT NULL
AND jsonb_typeof(rec.details->'all_winners') = 'array'
AND jsonb_array_length(rec.details->'all_winners') = 1 THEN
flags := flags || jsonb_build_object(
'code', 'single_bidder',
'severity', 'high',
'label', 'Un singur câștigător'
);
END IF;
END IF;
-- 2) Short deadline (only c_notice / rfq_invitation have submission deadlines)
IF rec.type IN ('c_notice','rfq_invitation')
AND rec.publication_date IS NOT NULL
AND rec.deadline_submission IS NOT NULL
AND (rec.deadline_submission - rec.publication_date) < interval '10 days' THEN
flags := flags || jsonb_build_object(
'code', 'short_deadline',
'severity', 'medium',
'label', 'Termen scurt',
'detail', EXTRACT(EPOCH FROM (rec.deadline_submission - rec.publication_date))/86400.0
);
END IF;
-- 3) Suspicious savings: awarded_value < 50% of estimated
IF rec.awarded_value IS NOT NULL
AND rec.estimated_value IS NOT NULL
AND rec.awarded_value > 0
AND rec.estimated_value > 0
AND rec.awarded_value < 0.5 * rec.estimated_value THEN
flags := flags || jsonb_build_object(
'code', 'suspicious_savings',
'severity', 'medium',
'label', 'Economii suspecte',
'detail', round(100.0 * (1 - rec.awarded_value / rec.estimated_value))::int
);
END IF;
-- 5) Overprice: awarded_value > 2 * median per CPV division
IF rec.awarded_value IS NOT NULL
AND rec.awarded_value > 0
AND rec.cpv_division IS NOT NULL THEN
SELECT median_value INTO v_median
FROM seap.mv_cpv_median_value
WHERE cpv_division = rec.cpv_division;
IF v_median IS NOT NULL AND v_median > 0
AND rec.awarded_value > 2 * v_median THEN
flags := flags || jsonb_build_object(
'code', 'overprice',
'severity', 'medium',
'label', 'Peste piață',
'detail', round((rec.awarded_value / v_median)::numeric, 1)
);
END IF;
END IF;
RETURN flags;
END;
$$;
-- ── Function: refresh all risk-related materialized views ──
CREATE OR REPLACE FUNCTION seap.refresh_risk_views()
RETURNS VOID
LANGUAGE plpgsql
AS $$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_cpv_median_value;
EXCEPTION WHEN OTHERS THEN
REFRESH MATERIALIZED VIEW seap.mv_cpv_median_value;
END;
$$;
CREATE OR REPLACE FUNCTION seap.refresh_concentration()
RETURNS VOID
LANGUAGE plpgsql
AS $$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_authority_concentration;
EXCEPTION WHEN OTHERS THEN
REFRESH MATERIALIZED VIEW seap.mv_authority_concentration;
END;
$$;
COMMIT;
-- Initial population (non-transactional)
REFRESH MATERIALIZED VIEW seap.mv_cpv_median_value;
REFRESH MATERIALIZED VIEW seap.mv_authority_concentration;
+94
View File
@@ -0,0 +1,94 @@
-- Per-UAT KPI materialized view powering /harta v2 multi-metric choropleth.
-- Columns:
-- total_contracts, total_value, distinct_suppliers
-- direct_pct — share of value awarded via direct procurement (type='da')
-- framework_pct — share via framework agreements
-- hhi_suppliers — Herfindahl-Hirschman index 0..10000 (DOJ thresholds: <1500 ok, 1500-2500 moderate, >2500 concentrated)
-- top_supplier_share — biggest single-supplier dependency 0..1
-- q4_spike — Q4 value / (yearly_avg_quarter) for last full year; >1.5 = spike, NULL if no data
--
-- Refresh: weekly cron — REFRESH MATERIALIZED VIEW CONCURRENTLY seap.uat_kpi;
-- Idempotent: safe to re-run.
BEGIN;
DROP MATERIALIZED VIEW IF EXISTS seap.uat_kpi CASCADE;
CREATE MATERIALIZED VIEW seap.uat_kpi AS
WITH base AS (
SELECT
a.authority_siruta AS siruta,
a.authority_cui,
a.supplier_cui,
a.type,
a.awarded_value,
a.publication_date,
a.framework_agreement
FROM seap.announcements a
WHERE a.authority_siruta IS NOT NULL
),
uat_totals AS (
SELECT
siruta,
COUNT(*)::int AS total_contracts,
COALESCE(SUM(awarded_value), 0)::numeric(20,2) AS total_value,
COALESCE(SUM(awarded_value) FILTER (WHERE type = 'da'), 0)::numeric(20,2) AS direct_value,
COALESCE(SUM(awarded_value) FILTER (WHERE framework_agreement = true), 0)::numeric(20,2) AS framework_value,
COUNT(DISTINCT supplier_cui)::int AS distinct_suppliers
FROM base
GROUP BY siruta
),
supplier_shares AS (
SELECT
siruta,
supplier_cui,
SUM(awarded_value) / NULLIF(SUM(SUM(awarded_value)) OVER (PARTITION BY siruta), 0) AS ratio
FROM base
WHERE supplier_cui IS NOT NULL AND awarded_value IS NOT NULL
GROUP BY siruta, supplier_cui
),
hhi_calc AS (
SELECT
siruta,
COALESCE(SUM(POWER(ratio, 2)) * 10000, 0) AS hhi,
COALESCE(MAX(ratio), 0) AS top_supplier_share
FROM supplier_shares
GROUP BY siruta
),
last_full_year AS (
SELECT extract(year from now()) - 1 AS yr
),
q4_data AS (
SELECT
siruta,
COALESCE(SUM(awarded_value) FILTER (WHERE extract(quarter FROM publication_date) = 4), 0)::numeric AS q4_value,
COALESCE(SUM(awarded_value), 0)::numeric AS yearly_value
FROM base
WHERE extract(year FROM publication_date) = (SELECT yr FROM last_full_year)
GROUP BY siruta
)
SELECT
ut.siruta,
ut.total_contracts,
ut.total_value,
ut.distinct_suppliers,
CASE WHEN ut.total_value > 0 THEN ut.direct_value / ut.total_value ELSE 0 END AS direct_pct,
CASE WHEN ut.total_value > 0 THEN ut.framework_value / ut.total_value ELSE 0 END AS framework_pct,
COALESCE(hh.hhi, 0)::numeric(10,2) AS hhi_suppliers,
COALESCE(hh.top_supplier_share, 0)::numeric(8,4) AS top_supplier_share,
CASE WHEN q4.yearly_value > 0 THEN q4.q4_value / (q4.yearly_value / 4) ELSE NULL END AS q4_spike
FROM uat_totals ut
LEFT JOIN hhi_calc hh ON hh.siruta = ut.siruta
LEFT JOIN q4_data q4 ON q4.siruta = ut.siruta;
CREATE UNIQUE INDEX IF NOT EXISTS idx_uat_kpi_pk ON seap.uat_kpi(siruta);
CREATE INDEX IF NOT EXISTS idx_uat_kpi_value ON seap.uat_kpi(total_value DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_uat_kpi_direct ON seap.uat_kpi(direct_pct DESC) WHERE total_contracts > 5;
CREATE INDEX IF NOT EXISTS idx_uat_kpi_hhi ON seap.uat_kpi(hhi_suppliers DESC) WHERE total_contracts > 5;
COMMIT;
-- Refresh helper (idempotent)
CREATE OR REPLACE FUNCTION seap.refresh_uat_kpi() RETURNS void LANGUAGE sql AS $$
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.uat_kpi;
$$;
@@ -0,0 +1,58 @@
-- Full-text search infrastructure for /api/cauta and /achizitii/cauta.
-- Uses 'simple' config + unaccent for diacritic-insensitive matching, since RO
-- doesn't have a built-in PG text search config and we don't want stemming bias.
--
-- Idempotent: safe to re-run.
BEGIN;
-- Ensure unaccent extension
CREATE EXTENSION IF NOT EXISTS unaccent;
-- Wrap unaccent as IMMUTABLE so it can be used in expression indexes / generated cols.
-- Safe because we don't reload the unaccent dictionary at runtime.
CREATE OR REPLACE FUNCTION seap.immutable_unaccent(text) RETURNS text
LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT
AS $$ SELECT public.unaccent('public.unaccent', $1) $$;
-- Plain (non-generated) tsvector column populated by trigger.
ALTER TABLE seap.announcements ADD COLUMN IF NOT EXISTS search_tsv tsvector;
CREATE OR REPLACE FUNCTION seap.update_search_tsv() RETURNS trigger
LANGUAGE plpgsql AS $$
BEGIN
NEW.search_tsv :=
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.title, ''))), 'A') ||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.description, ''))), 'B') ||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.authority_name, ''))), 'C') ||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.supplier_name, ''))), 'C') ||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.cpv_name_ro, ''))), 'D') ||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(NEW.cpv_name, ''))), 'D');
RETURN NEW;
END $$;
DROP TRIGGER IF EXISTS trg_announcements_search_tsv ON seap.announcements;
CREATE TRIGGER trg_announcements_search_tsv
BEFORE INSERT OR UPDATE OF title, description, authority_name, supplier_name, cpv_name_ro, cpv_name
ON seap.announcements
FOR EACH ROW EXECUTE FUNCTION seap.update_search_tsv();
CREATE INDEX IF NOT EXISTS idx_ann_search_tsv ON seap.announcements USING gin(search_tsv);
-- Title-only trgm for "starts-with" or substring autocompletes
CREATE INDEX IF NOT EXISTS idx_ann_title_trgm
ON seap.announcements USING gin(title gin_trgm_ops);
COMMIT;
-- Backfill existing rows (run outside the transaction). Long-running on 642K
-- rows but does NOT block reads.
UPDATE seap.announcements
SET search_tsv =
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(title, ''))), 'A') ||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(description, ''))), 'B') ||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(authority_name, ''))), 'C') ||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(supplier_name, ''))), 'C') ||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(cpv_name_ro, ''))), 'D') ||
setweight(to_tsvector('simple', seap.immutable_unaccent(coalesce(cpv_name, ''))), 'D')
WHERE search_tsv IS NULL;
@@ -0,0 +1,165 @@
-- Materialized views for slow /achizitii/retete pages.
-- Refresh nightly via vreaudigital-mvs.timer.
BEGIN;
-- ──────────────────────────────────────────────────────────────────────
-- mv_top_cpv_divisions: powers /retete/top-categorii-bani + cpv-directe-mari
-- ──────────────────────────────────────────────────────────────────────
DROP MATERIALIZED VIEW IF EXISTS seap.mv_top_cpv_divisions CASCADE;
CREATE MATERIALIZED VIEW seap.mv_top_cpv_divisions AS
SELECT
a.cpv_division,
c.name_ro AS cpv_name,
c.emoji,
COUNT(*)::int AS contracts,
COALESCE(SUM(a.awarded_value), 0)::numeric(20,2) AS total_value,
COALESCE(SUM(a.awarded_value) FILTER (WHERE a.type = 'da'), 0)::numeric(20,2) AS direct_value,
COUNT(DISTINCT a.authority_cui)::int AS distinct_authorities,
COUNT(DISTINCT a.supplier_cui)::int AS distinct_suppliers,
CASE WHEN COALESCE(SUM(a.awarded_value), 0) > 0
THEN COALESCE(SUM(a.awarded_value) FILTER (WHERE a.type = 'da'), 0) / SUM(a.awarded_value)
ELSE 0
END::numeric(8,4) AS direct_pct
FROM seap.announcements a
LEFT JOIN seap.cpv_codes c ON c.code = a.cpv_division
WHERE a.cpv_division IS NOT NULL
AND a.awarded_value IS NOT NULL
GROUP BY a.cpv_division, c.name_ro, c.emoji;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_top_cpv_div_pk ON seap.mv_top_cpv_divisions(cpv_division);
CREATE INDEX IF NOT EXISTS idx_mv_top_cpv_div_value ON seap.mv_top_cpv_divisions(total_value DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_mv_top_cpv_div_directpct ON seap.mv_top_cpv_divisions(direct_pct DESC) WHERE total_value >= 100000000;
-- ──────────────────────────────────────────────────────────────────────
-- mv_top_suppliers: powers /retete/top-firme-castigatoare + firme-multe-judete
-- ──────────────────────────────────────────────────────────────────────
DROP MATERIALIZED VIEW IF EXISTS seap.mv_top_suppliers CASCADE;
CREATE MATERIALIZED VIEW seap.mv_top_suppliers AS
WITH agg AS (
SELECT
regexp_replace(upper(a.supplier_cui), '(^RO)|\s+', '', 'g') AS cui_norm,
MIN(a.supplier_name) AS name,
MIN(cl.county) AS county,
COUNT(*)::int AS contracts,
COALESCE(SUM(a.awarded_value), 0)::numeric(20,2) AS total_value,
COUNT(DISTINCT a.authority_cui)::int AS distinct_buyers,
COUNT(DISTINCT acl.county)::int AS county_count
FROM seap.announcements a
LEFT JOIN seap.cui_location cl ON cl.cui = regexp_replace(upper(a.supplier_cui), '(^RO)|\s+', '', 'g')
LEFT JOIN seap.cui_location acl ON acl.cui = a.authority_cui
WHERE a.supplier_cui IS NOT NULL
AND a.awarded_value IS NOT NULL
GROUP BY 1
)
SELECT * FROM agg WHERE total_value > 0;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_top_suppliers_pk ON seap.mv_top_suppliers(cui_norm);
CREATE INDEX IF NOT EXISTS idx_mv_top_suppliers_value ON seap.mv_top_suppliers(total_value DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_mv_top_suppliers_counties ON seap.mv_top_suppliers(county_count DESC NULLS LAST);
-- ──────────────────────────────────────────────────────────────────────
-- mv_top_authorities: powers /retete/top-autoritati-cheltuitori
-- ──────────────────────────────────────────────────────────────────────
DROP MATERIALIZED VIEW IF EXISTS seap.mv_top_authorities CASCADE;
CREATE MATERIALIZED VIEW seap.mv_top_authorities AS
SELECT
a.authority_cui,
MIN(a.authority_name) AS name,
MIN(cl.county) AS county,
MIN(a.authority_type) AS authority_type,
MIN(cl.siruta) AS siruta,
COUNT(*)::int AS contracts,
COALESCE(SUM(a.awarded_value), 0)::numeric(20,2) AS total_value,
COUNT(DISTINCT a.supplier_cui)::int AS distinct_suppliers
FROM seap.announcements a
LEFT JOIN seap.cui_location cl ON cl.cui = a.authority_cui
WHERE a.authority_cui IS NOT NULL
AND a.awarded_value IS NOT NULL
GROUP BY a.authority_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_top_auth_pk ON seap.mv_top_authorities(authority_cui);
CREATE INDEX IF NOT EXISTS idx_mv_top_auth_value ON seap.mv_top_authorities(total_value DESC NULLS LAST);
-- ──────────────────────────────────────────────────────────────────────
-- mv_recurrent_pairs: powers /retete/perechi-recurente
-- ──────────────────────────────────────────────────────────────────────
DROP MATERIALIZED VIEW IF EXISTS seap.mv_recurrent_pairs CASCADE;
CREATE MATERIALIZED VIEW seap.mv_recurrent_pairs AS
SELECT
a.authority_cui,
MIN(a.authority_name) AS authority_name,
regexp_replace(upper(a.supplier_cui), '(^RO)|\s+', '', 'g') AS supplier_cui_norm,
MIN(a.supplier_name) AS supplier_name,
MIN(cl.county) AS county,
COUNT(*)::int AS contracts,
COALESCE(SUM(a.awarded_value), 0)::numeric(20,2) AS total_value,
MIN(EXTRACT(YEAR FROM a.publication_date))::int AS first_year,
MAX(EXTRACT(YEAR FROM a.publication_date))::int AS last_year
FROM seap.announcements a
LEFT JOIN seap.cui_location cl ON cl.cui = a.authority_cui
WHERE a.authority_cui IS NOT NULL
AND a.supplier_cui IS NOT NULL
AND a.awarded_value IS NOT NULL
GROUP BY a.authority_cui, regexp_replace(upper(a.supplier_cui), '(^RO)|\s+', '', 'g')
HAVING COUNT(*) >= 5;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_recurr_pk ON seap.mv_recurrent_pairs(authority_cui, supplier_cui_norm);
CREATE INDEX IF NOT EXISTS idx_mv_recurr_value ON seap.mv_recurrent_pairs(total_value DESC NULLS LAST);
-- ──────────────────────────────────────────────────────────────────────
-- mv_supplier_cpv_share: powers /retete/firme-specializate-extrem
-- ──────────────────────────────────────────────────────────────────────
DROP MATERIALIZED VIEW IF EXISTS seap.mv_supplier_cpv_share CASCADE;
CREATE MATERIALIZED VIEW seap.mv_supplier_cpv_share AS
WITH supplier_cpv AS (
SELECT
regexp_replace(upper(a.supplier_cui), '(^RO)|\s+', '', 'g') AS cui,
MIN(a.supplier_name) AS name,
a.cpv_division,
MIN(c.name_ro) AS cpv_name,
MIN(c.emoji) AS emoji,
COUNT(*)::int AS contracts,
COALESCE(SUM(a.awarded_value), 0)::numeric(20,2) AS cpv_value
FROM seap.announcements a
LEFT JOIN seap.cpv_codes c ON c.code = a.cpv_division
WHERE a.supplier_cui IS NOT NULL
AND a.cpv_division IS NOT NULL
AND a.awarded_value IS NOT NULL
GROUP BY 1, a.cpv_division
),
supplier_total AS (
SELECT cui, SUM(cpv_value) AS total
FROM supplier_cpv
GROUP BY cui
HAVING SUM(cpv_value) >= 5000000
),
ranked AS (
SELECT
sc.cui, sc.name, sc.cpv_division, sc.cpv_name, sc.emoji,
sc.contracts, sc.cpv_value,
st.total,
(sc.cpv_value / st.total)::numeric(8,4) AS share,
ROW_NUMBER() OVER (PARTITION BY sc.cui ORDER BY sc.cpv_value DESC) AS rn
FROM supplier_cpv sc
JOIN supplier_total st ON st.cui = sc.cui
)
SELECT * FROM ranked WHERE rn = 1;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_sup_cpv_pk ON seap.mv_supplier_cpv_share(cui);
CREATE INDEX IF NOT EXISTS idx_mv_sup_cpv_share ON seap.mv_supplier_cpv_share(share DESC, total DESC);
COMMIT;
-- Refresh helper
CREATE OR REPLACE FUNCTION seap.refresh_recipe_mvs() RETURNS void LANGUAGE sql AS $$
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_cpv_divisions;
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_suppliers;
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_top_authorities;
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_recurrent_pairs;
REFRESH MATERIALIZED VIEW CONCURRENTLY seap.mv_supplier_cpv_share;
$$;
+161
View File
@@ -0,0 +1,161 @@
-- Firms registry — extends seap.cui_location with full ONRC + ANAF data
-- for ALL Romanian companies (~1.5M), not just those active in SEAP.
--
-- Sources:
-- ONRC bulk on data.gov.ro (CC-BY 4.0): COD_INMATRICULARE-keyed CSV files
-- ANAF webservicesp v9: per-CUI enrichment (status, address, contacts)
-- Photon (Komoot) self-hosted: address → lat/lng geocoding
--
-- Idempotent: safe to re-run.
BEGIN;
CREATE SCHEMA IF NOT EXISTS firms;
-- ──────────────────────────────────────────────────────────────────
-- Master firms table — one row per CUI (unique)
-- ──────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS firms.entities (
cui TEXT PRIMARY KEY,
cod_inmatriculare TEXT, -- e.g. J40/630/1992 — ONRC primary key, NULL for PFAs without CUI
euid TEXT, -- European identifier
name TEXT NOT NULL,
forma_juridica TEXT, -- SRL, SA, PFA, II, IF, etc.
-- ── Address (parsed from ONRC) ──
adr_tara TEXT,
adr_judet TEXT,
adr_localitate TEXT,
adr_strada TEXT,
adr_numar TEXT,
adr_bloc TEXT,
adr_scara TEXT,
adr_etaj TEXT,
adr_apartament TEXT,
adr_cod_postal TEXT,
adr_sector TEXT,
adr_completare TEXT, -- raw appendix
adr_full TEXT, -- concatenated, used for geocoding query
siruta TEXT, -- matched UAT siruta (joined with GisUat)
-- ── Geolocation ──
lat DOUBLE PRECISION,
lng DOUBLE PRECISION,
geom GEOGRAPHY(POINT, 4326),
geocode_source TEXT, -- 'photon', 'nominatim', 'siruta_centroid', 'manual'
geocode_score REAL, -- 0..1 confidence
-- ── Registration ──
data_inmatriculare DATE,
registration_year INT,
-- ── Status (from ANAF v9 + ONRC stare_firma) ──
is_active_anaf BOOLEAN, -- NULL=unknown, true=active, false=inactive (lista contribuabili inactivi)
is_radiated_onrc BOOLEAN, -- ONRC stare_firma RADIATA
is_vat_registered BOOLEAN, -- ANAF scpTVA active
is_efactura BOOLEAN, -- ANAF statusRO_e_Factura
status_text TEXT, -- decoded human-readable: "Activă", "Radiată", "Insolvență", etc.
-- ── Contact (best-effort, often NULL) ──
phone TEXT,
fax TEXT,
web TEXT, -- from ONRC OD_FIRME.CSV.WEB column
-- ── Activity classification ──
caen_principal TEXT, -- CAEN cod from ANAF
caen_autorizate TEXT[], -- multi-row aggregate from OD_CAEN_AUTORIZAT.CSV
-- ── Foreign parent ──
tara_firma_mama TEXT, -- from ONRC OD_FIRME.CSV.TARA_FIRMA_MAMA
-- ── Ownership / management (from ONRC reprezentanti) ──
rep_legali JSONB, -- [{persoana, calitate, judet_localitate, tara}, ...]
-- ── Metadata ──
source_onrc_dataset TEXT, -- e.g. 'firme-03-04-2026'
anaf_fetched_at TIMESTAMPTZ,
onrc_fetched_at TIMESTAMPTZ,
geocoded_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_firms_cod_inmatriculare ON firms.entities(cod_inmatriculare) WHERE cod_inmatriculare IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_firms_county ON firms.entities(adr_judet);
CREATE INDEX IF NOT EXISTS idx_firms_siruta ON firms.entities(siruta) WHERE siruta IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_firms_caen_principal ON firms.entities(caen_principal);
CREATE INDEX IF NOT EXISTS idx_firms_geom ON firms.entities USING gist(geom);
CREATE INDEX IF NOT EXISTS idx_firms_name_trgm ON firms.entities USING gin(name gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_firms_active ON firms.entities(is_active_anaf, is_radiated_onrc) WHERE is_active_anaf = true AND (is_radiated_onrc = false OR is_radiated_onrc IS NULL);
-- ──────────────────────────────────────────────────────────────────
-- Staging tables for raw ONRC CSV imports (truncated each refresh)
-- ──────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS firms.staging_onrc_firme (
denumire TEXT,
cui TEXT,
cod_inmatriculare TEXT,
data_inmatriculare TEXT, -- YYYY-MM-DD or empty
euid TEXT,
forma_juridica TEXT,
adr_tara TEXT,
adr_judet TEXT,
adr_localitate TEXT,
adr_strada TEXT,
adr_numar TEXT,
adr_bloc TEXT,
adr_scara TEXT,
adr_etaj TEXT,
adr_apartament TEXT,
adr_cod_postal TEXT,
adr_sector TEXT,
adr_completare TEXT,
web TEXT,
tara_firma_mama TEXT
);
CREATE TABLE IF NOT EXISTS firms.staging_onrc_caen (
cod_inmatriculare TEXT,
cod_caen TEXT,
ver_caen TEXT
);
CREATE TABLE IF NOT EXISTS firms.staging_onrc_stare (
cod_inmatriculare TEXT,
cod_stare TEXT
);
CREATE TABLE IF NOT EXISTS firms.staging_onrc_reprezentanti (
cod_inmatriculare TEXT,
persoana TEXT,
calitate TEXT,
data_nastere TEXT,
localitate_nastere TEXT,
judet_nastere TEXT,
tara_nastere TEXT,
localitate TEXT,
judet TEXT,
tara TEXT
);
-- ──────────────────────────────────────────────────────────────────
-- Stare firma codelist (manually populated — short list ~10 codes)
-- ──────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS firms.stare_codelist (
cod TEXT PRIMARY KEY,
label TEXT NOT NULL
);
INSERT INTO firms.stare_codelist (cod, label) VALUES
('1', 'Activă'),
('2', 'Suspendată activitate'),
('3', 'Dizolvare'),
('4', 'Radiată'),
('5', 'În lichidare'),
('6', 'Insolvență'),
('7', 'Reorganizare judiciară'),
('8', 'Faliment'),
('9', 'Întreruptă activitate')
ON CONFLICT (cod) DO NOTHING;
COMMIT;
@@ -0,0 +1,75 @@
-- Financial indicators per firm-year, from Ministerul Finanțelor "Situații financiare"
-- annual datasets on data.gov.ro (CC-BY 4.0).
--
-- 21 indicators (I1-I20 + CAEN) extracted from balance sheet + P&L + headcount.
-- Schema covers years 2020-2024 initially; older years available too if needed.
BEGIN;
CREATE TABLE IF NOT EXISTS firms.financials (
cui TEXT NOT NULL,
year INT NOT NULL,
caen TEXT,
-- ── Bilanț — active ──
active_imobilizate NUMERIC(20,2), -- I1
active_circulante NUMERIC(20,2), -- I2
stocuri NUMERIC(20,2), -- I3
creante NUMERIC(20,2), -- I4
casa_banci NUMERIC(20,2), -- I5
cheltuieli_avans NUMERIC(20,2), -- I6
-- ── Bilanț — datorii / pasive ──
datorii NUMERIC(20,2), -- I7
venituri_avans NUMERIC(20,2), -- I8
provizioane NUMERIC(20,2), -- I9
capitaluri_total NUMERIC(20,2), -- I10
capital_subscris NUMERIC(20,2), -- I11
patrimoniul_regiei NUMERIC(20,2), -- I12
-- ── Cont profit/pierdere ──
cifra_afaceri NUMERIC(20,2), -- I13 (cifră afaceri netă)
venituri_total NUMERIC(20,2), -- I14
cheltuieli_total NUMERIC(20,2), -- I15
profit_brut NUMERIC(20,2), -- I16
pierdere_bruta NUMERIC(20,2), -- I17
profit_net NUMERIC(20,2), -- I18
pierdere_neta NUMERIC(20,2), -- I19
-- ── HR ──
numar_salariati BIGINT, -- I20 (some data anomalies need wider range)
-- ── Metadata ──
source TEXT DEFAULT 'mfinante.data.gov.ro',
fetched_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (cui, year)
);
CREATE INDEX IF NOT EXISTS idx_fin_cui ON firms.financials(cui);
CREATE INDEX IF NOT EXISTS idx_fin_year ON firms.financials(year);
CREATE INDEX IF NOT EXISTS idx_fin_ca_desc ON firms.financials(year, cifra_afaceri DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_fin_profit_desc ON firms.financials(year, profit_net DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_fin_salariati_desc ON firms.financials(year, numar_salariati DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_fin_caen ON firms.financials(caen);
-- Materialized view: latest year financials per CUI for fast profile lookup
CREATE MATERIALIZED VIEW IF NOT EXISTS firms.mv_financials_latest AS
SELECT DISTINCT ON (cui) *
FROM firms.financials
WHERE cui IS NOT NULL
ORDER BY cui, year DESC;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_fin_latest_pk ON firms.mv_financials_latest(cui);
-- Staging table for raw CSV imports
CREATE TABLE IF NOT EXISTS firms.staging_financials (
cui TEXT,
caen TEXT,
i1 NUMERIC, i2 NUMERIC, i3 NUMERIC, i4 NUMERIC, i5 NUMERIC,
i6 NUMERIC, i7 NUMERIC, i8 NUMERIC, i9 NUMERIC, i10 NUMERIC,
i11 NUMERIC, i12 NUMERIC, i13 NUMERIC, i14 NUMERIC, i15 NUMERIC,
i16 NUMERIC, i17 NUMERIC, i18 NUMERIC, i19 NUMERIC, i20 NUMERIC
);
COMMIT;
@@ -0,0 +1,46 @@
-- 014_firms_postal_codes.sql
-- GeoNames RO postal codes (37915 entries, CC-BY 4.0).
-- Used for fast batch geocoding of firms.entities at postal-code precision
-- — covers ~2.07M firms (52%) with adr_cod_postal populated.
-- Source: https://download.geonames.org/export/zip/RO.zip
-- Refresh: yearly via cron (data updates ~yearly per GeoNames).
CREATE TABLE IF NOT EXISTS firms.postal_codes (
postal_code text NOT NULL,
place_name text NOT NULL,
county text,
county_code text,
admin2_code text,
admin3_code text,
admin3_name text,
lat numeric(9,6) NOT NULL,
lng numeric(9,6) NOT NULL,
accuracy int,
PRIMARY KEY (postal_code, place_name)
);
-- One row per postal code — when multiple places share a code, pick the one
-- with the best accuracy (lowest int value in GeoNames is most precise).
CREATE OR REPLACE VIEW firms.postal_codes_best AS
SELECT DISTINCT ON (postal_code)
postal_code, place_name, county, county_code, lat, lng, accuracy
FROM firms.postal_codes
ORDER BY postal_code, accuracy NULLS LAST, place_name;
CREATE INDEX IF NOT EXISTS idx_postal_codes_postal ON firms.postal_codes(postal_code);
-- Staging table for COPY from the GeoNames TSV layout.
CREATE TABLE IF NOT EXISTS firms.staging_postal_codes (
country_code text,
postal_code text,
place_name text,
admin1_name text,
admin1_code text,
admin2_name text,
admin2_code text,
admin3_name text,
admin3_code text,
lat text,
lng text,
accuracy text
);
@@ -0,0 +1,32 @@
-- 015_firms_onrc_extras.sql
-- Two additional ONRC bulk CSVs we weren't importing yet:
-- 1. od_reprezentanti_if.csv — administrators of "Întreprinderi Familiale"
-- (~80K rows). The persoană field plus locality+county of birth gives us
-- a separate small "owner registry" parallel to rep_legali on firms.entities.
-- 2. od_sucursale_alte_state_membre.csv — branches of RO companies registered
-- in other EU states (~tiny, ~hundreds of rows). Useful for follow-the-money
-- questions like "RO firm with EU branches winning EU-funded contracts".
--
-- Both are keyed by cod_inmatriculare which we already have on firms.entities,
-- so JOINs are trivial. Idempotent: TRUNCATE-and-reload on each ONRC snapshot.
CREATE TABLE IF NOT EXISTS firms.reprezentanti_if (
cod_inmatriculare text NOT NULL,
nume text,
data_nastere text, -- raw DD.MM.YYYY string from ONRC
localitate_nastere text,
judet_nastere text,
tara_nastere text,
calitate text
);
CREATE INDEX IF NOT EXISTS idx_rep_if_cod ON firms.reprezentanti_if(cod_inmatriculare);
CREATE TABLE IF NOT EXISTS firms.sucursale_ue (
cod_inmatriculare text NOT NULL,
tip_unitate text, -- usually "Sucursală"
denumire_sucursala text,
euid text,
cod_fiscal_strain text, -- ONRC field is COD_FISCAL but it's the foreign one
tara text -- destination country
);
CREATE INDEX IF NOT EXISTS idx_sucursale_ue_cod ON firms.sucursale_ue(cod_inmatriculare);
@@ -0,0 +1,97 @@
-- 016_firms_financials_categories.sql
-- Separate tables for the non-WEB_UU/BL_BS_SL MFP financial categories.
-- Schemas differ enough between categories (ONG=46 indicators with separate
-- non-profit + economic activity tracking; banks=23 IFRS-specific indicators)
-- that lumping them with firms.financials would distort the existing schema.
--
-- We store raw indicators in JSONB to avoid 46-column tables and to absorb
-- future ANAF schema tweaks without migrations. A handful of canonical
-- columns lifted out of JSONB for fast querying / recipe filtering.
-- ─── ONG (Asociații, Fundații) ────────────────────────────────────────────
-- Source: WEB_ONG_AN20XX.txt on data.gov.ro situatii_financiare_<year>
-- Schema: CUI, CAEN, CAENO, i1..i46
-- i12 = Capitaluri proprii
-- i37 = Venituri totale (prevederi anuale)
-- i38 = Venituri totale (realizat la 31.12)
-- i39 = Cheltuieli totale (prevederi anuale)
-- i40 = Cheltuieli totale (realizat la 31.12)
-- i41 = Excedent / Profit (prevederi anuale)
-- i42 = Excedent / Profit (realizat la 31.12)
-- i45 = Personal activități fără scop patrimonial
-- i46 = Personal activități economice
CREATE TABLE IF NOT EXISTS firms.financials_ong (
cui text NOT NULL,
year integer NOT NULL,
caen text, -- activitate economică
caeno text, -- activitate fără scop patrimonial
-- Lifted convenience columns:
capitaluri_proprii numeric(20,2), -- i12
venituri_total numeric(20,2), -- i38
cheltuieli_total numeric(20,2), -- i40
excedent numeric(20,2), -- i42
personal_neeconomic bigint, -- i45
personal_economic bigint, -- i46
-- Raw indicators for completeness:
indicators jsonb NOT NULL, -- {i1..i46} all values
source text DEFAULT 'mfinante:WEB_ONG',
fetched_at timestamptz DEFAULT now(),
PRIMARY KEY (cui, year)
);
CREATE INDEX IF NOT EXISTS idx_fin_ong_year ON firms.financials_ong(year);
CREATE INDEX IF NOT EXISTS idx_fin_ong_caen ON firms.financials_ong(caen);
-- Staging table: 49 columns (CUI, CAEN, CAENO, i1..i46)
CREATE TABLE IF NOT EXISTS firms.staging_ong (
cui text,
caen text,
caeno text,
i1 text, i2 text, i3 text, i4 text, i5 text, i6 text, i7 text,
i8 text, i9 text, i10 text, i11 text, i12 text, i13 text, i14 text,
i15 text, i16 text, i17 text, i18 text, i19 text, i20 text, i21 text,
i22 text, i23 text, i24 text, i25 text, i26 text, i27 text, i28 text,
i29 text, i30 text, i31 text, i32 text, i33 text, i34 text, i35 text,
i36 text, i37 text, i38 text, i39 text, i40 text, i41 text, i42 text,
i43 text, i44 text, i45 text, i46 text
);
-- ─── Bănci / Instituții de Credit ─────────────────────────────────────────
-- Source: WEB_Inst_de_credit_20XX.txt on data.gov.ro situatii_financiare_<year>
-- Schema: CUI, CAEN, i1..i23 (IFRS bank-specific)
-- i6 = Active financiare evaluate la cost amortizat
-- i14 = Capital social
-- i17 = Profit / (-) pierdere aferent exercițiului
-- i19 = Profit / pierdere din operațiuni continue înainte de impozitare
-- i22 = Profit / pierdere aferent exercițiului
-- i23 = Cifra de afaceri netă
-- (Note: source CSV has a typo for i18 — column header "18" without "i"
-- prefix. Importer treats it consistently as i18.)
CREATE TABLE IF NOT EXISTS firms.financials_banks (
cui text NOT NULL,
year integer NOT NULL,
caen text,
-- Lifted convenience columns:
active_financiare_amortiz numeric(20,2), -- i6
capital_social numeric(20,2), -- i14
profit_exercitiu numeric(20,2), -- i22
profit_inainte_impozit numeric(20,2), -- i19
cifra_afaceri numeric(20,2), -- i23
-- Raw indicators for completeness:
indicators jsonb NOT NULL, -- {i1..i23}
source text DEFAULT 'mfinante:WEB_Inst_de_credit',
fetched_at timestamptz DEFAULT now(),
PRIMARY KEY (cui, year)
);
CREATE INDEX IF NOT EXISTS idx_fin_banks_year ON firms.financials_banks(year);
CREATE TABLE IF NOT EXISTS firms.staging_banks (
cui text,
caen text,
i1 text, i2 text, i3 text, i4 text, i5 text, i6 text, i7 text,
i8 text, i9 text, i10 text, i11 text, i12 text, i13 text, i14 text,
i15 text, i16 text, i17 text, i18 text, i19 text, i20 text, i21 text,
i22 text, i23 text
);
@@ -0,0 +1,58 @@
-- 017_fonduri_afir.sql
-- AFIR (Agenția pentru Finanțarea Investițiilor Rurale) plăți FEGA + FEADR.
-- Source: https://www.afir.ro/rapoarte/beneficiari-de-fonduri-europene/date-deschise/
-- Format: XLSX bulk per year, ~560K rows/year, no direct CUI column.
-- Strategy: load all rows, then fuzzy-match name → cui in a separate batch job.
CREATE SCHEMA IF NOT EXISTS fonduri;
CREATE TABLE IF NOT EXISTS fonduri.afir_plati (
id BIGSERIAL PRIMARY KEY,
source_year smallint NOT NULL,
beneficiar_name text NOT NULL,
last_name text, -- empty for legal entities
mama_cui text, -- mother company name + CUI when applicable (mostly empty)
localitate text,
cod_masura text,
obiectiv text,
data_start text,
data_end text,
fega_op numeric(20,2),
fega_total numeric(20,2),
feadr_op numeric(20,2),
feadr_total numeric(20,2),
op_amount numeric(20,2),
cofinantare numeric(20,2),
ue_total numeric(20,2),
-- Enrichment (filled by separate matcher):
cui text,
cui_match_score real,
cui_match_method text,
matched_at timestamptz,
fetched_at timestamptz DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_afir_year ON fonduri.afir_plati(source_year);
CREATE INDEX IF NOT EXISTS idx_afir_cui ON fonduri.afir_plati(cui) WHERE cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_afir_cod_masura ON fonduri.afir_plati(cod_masura);
CREATE INDEX IF NOT EXISTS idx_afir_localitate ON fonduri.afir_plati(localitate);
CREATE INDEX IF NOT EXISTS idx_afir_name_trgm ON fonduri.afir_plati USING gin (beneficiar_name gin_trgm_ops);
-- Staging table for COPY (no PK, all text)
CREATE TABLE IF NOT EXISTS fonduri.staging_afir (
beneficiar_name text,
last_name text,
mama_cui text,
localitate text,
cod_masura text,
obiectiv text,
data_start text,
data_end text,
fega_op text,
fega_total text,
feadr_op text,
feadr_total text,
op_amount text,
cofinantare text,
ue_total text
);
@@ -0,0 +1,102 @@
-- 018_fonduri_beneficiar_privat.sql
-- Achiziții făcute de beneficiarii PRIVAȚI ai fondurilor europene
-- (firme care au primit POIM/POR/PNRR/AFIR etc. și trebuie să facă achiziții
-- transparente conform Manualului Beneficiarului — dar NU intră în SEAP fiindcă
-- sunt privați, nu autorități publice).
--
-- Source: https://beneficiar.fonduri-ue.ro:8080/anunturi
-- Volume: ~48,650 anunțuri (and growing) cu loturi atașate.
-- Joinable to firms.entities prin name (no CUI in source — fuzzy match).
-- Joinable to fonduri.afir_plati prin name (overlap pe beneficiari AFIR).
-- Joinable to seap.announcements prin supplier_cui când suppliers selected pe
-- aceste proceduri devin furnizori la stat (cross-source signal).
CREATE TABLE IF NOT EXISTS fonduri.beneficiar_anunt (
id integer PRIMARY KEY, -- the numeric ID din URL /anunturi/details/2/{ID}
-- Project linkage
smis_proiect_id integer, -- id linkable cu /proiecte/details/{type}/{id}
smis_proiect_type smallint, -- 1=SMIS, 2=MySMIS, 3=PNRR, etc. (URL prefix)
smis_proiect_code text, -- "319946" (for display)
smis_proiect_name text, -- "CONSTRUIRE CAMIN BATRANI..."
-- Beneficiar (privat)
beneficiar_name text NOT NULL,
beneficiar_program_tag text, -- "SMIS" | "MySMIS" | other
beneficiar_adresa text,
beneficiar_contact text,
beneficiar_telefon text,
beneficiar_regiune text,
beneficiar_judet text,
beneficiar_localitate text,
-- Anunț status / details
procedura_status text, -- "în curs de ofertare" | "închisă"
data_publicare date,
data_limita_oferta date,
ora_limita_oferta text,
judet text, -- județul anunțului (poate diferi de beneficiar_judet)
tip_contract text, -- "Furnizare" | "Servicii" | "Lucrări"
versiune_specificatii text,
titlu text, -- titlul anunțului (din list page)
-- CUI enrichment (fuzzy match against firms.entities, second pass)
cui text,
cui_match_score real,
cui_match_method text, -- 'exact_name' | 'trgm' | 'manual'
matched_at timestamptz,
-- Source tracking
fetched_at timestamptz DEFAULT now(),
raw_html_sha256 char(64) -- to detect re-fetch needed
);
CREATE INDEX IF NOT EXISTS idx_ben_anunt_smis ON fonduri.beneficiar_anunt(smis_proiect_id);
CREATE INDEX IF NOT EXISTS idx_ben_anunt_judet ON fonduri.beneficiar_anunt(judet);
CREATE INDEX IF NOT EXISTS idx_ben_anunt_data ON fonduri.beneficiar_anunt(data_publicare DESC);
CREATE INDEX IF NOT EXISTS idx_ben_anunt_cui ON fonduri.beneficiar_anunt(cui) WHERE cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_ben_anunt_name_trgm ON fonduri.beneficiar_anunt USING gin (beneficiar_name gin_trgm_ops);
CREATE TABLE IF NOT EXISTS fonduri.beneficiar_anunt_lot (
id bigserial PRIMARY KEY,
anunt_id integer NOT NULL REFERENCES fonduri.beneficiar_anunt(id) ON DELETE CASCADE,
lot_no integer NOT NULL, -- 1, 2, 3... (ordinal)
lot_label text, -- "1" or "Achiziție licențe" — heading text
descriere_url text, -- /desc-lot?d={lot_id}
durata_contract text, -- e.g. "6 luni"
buget_lei numeric(20,2),
cpv_cod text, -- when present
spec_url text,
fetched_at timestamptz DEFAULT now(),
UNIQUE(anunt_id, lot_no)
);
-- Add lot_label column if migrating from earlier version
ALTER TABLE fonduri.beneficiar_anunt_lot ADD COLUMN IF NOT EXISTS lot_label text;
CREATE INDEX IF NOT EXISTS idx_ben_lot_anunt ON fonduri.beneficiar_anunt_lot(anunt_id);
CREATE INDEX IF NOT EXISTS idx_ben_lot_buget ON fonduri.beneficiar_anunt_lot(buget_lei DESC NULLS LAST);
-- Optional sister table for the EU project itself (we link via smis_proiect_id).
-- Populated by a separate /proiecte scraper later; placeholder schema for now:
CREATE TABLE IF NOT EXISTS fonduri.beneficiar_proiect (
id integer PRIMARY KEY, -- /proiecte/details/{type}/{id} → id
proiect_type smallint, -- 1=SMIS, 2=MySMIS, etc.
smis_code text, -- "319946"
titlu text,
beneficiar_name text,
program_op text, -- POIM/POR/POCU/PNRR/...
axa_prioritara text,
valoare_totala_lei numeric(20,2),
valoare_eligibila_lei numeric(20,2),
contributie_ue_lei numeric(20,2),
data_start date,
data_end date,
data_actualizare date,
judet text,
localitate text,
cui text,
cui_match_score real,
fetched_at timestamptz DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_ben_proiect_smis ON fonduri.beneficiar_proiect(smis_code);
CREATE INDEX IF NOT EXISTS idx_ben_proiect_program ON fonduri.beneficiar_proiect(program_op);
CREATE INDEX IF NOT EXISTS idx_ben_proiect_cui ON fonduri.beneficiar_proiect(cui) WHERE cui IS NOT NULL;
@@ -0,0 +1,66 @@
-- 019_cui_matcher.sql
-- Fuzzy CUI matcher for any external table that has a company name but no CUI
-- (fonduri.beneficiar_anunt, fonduri.afir_plati, future ANI shareholdings, etc.)
-- against firms.entities.
--
-- Strategy:
-- 1. Build a normalized form of each company name on both sides
-- (lowercase + unaccent + strip legal suffixes + collapse whitespace).
-- 2. Stage A (exact normalized match): expect ~40-50% hit rate when the
-- ONRC-canonical legal name was used in the source.
-- 3. Stage B (pg_trgm fuzzy): top candidate ≥ 0.85 AND uniquely best
-- (gap to second-best ≥ 0.10) → auto-accept.
-- 4. Stage C (judet disambiguation): when multiple candidates above
-- threshold, prefer firm whose adr_judet matches source's judet.
-- Idempotent. Re-runnable after each scrape.
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE EXTENSION IF NOT EXISTS unaccent;
-- ── Normalization helper ────────────────────────────────────────────────
-- We can't mark unaccent as immutable in a function via CREATE FUNCTION
-- because unaccent is by default STABLE. Wrap in IMMUTABLE so we can use
-- in indexes. (See Postgres docs: a custom IMMUTABLE wrapper is the standard
-- workaround.)
CREATE OR REPLACE FUNCTION firms.normalize_company_name(input text)
RETURNS text
LANGUAGE plpgsql
IMMUTABLE
PARALLEL SAFE
AS $$
DECLARE
s text;
BEGIN
IF input IS NULL THEN RETURN NULL; END IF;
s := lower(public.unaccent(input));
-- Strip leading legal-form prefixes
s := regexp_replace(s, '^(s\.?c\.?|sc)\s+', '', 'i');
-- Strip trailing legal-form suffixes (SRL, SRL-D, SA, PFA, II, IF, etc.)
s := regexp_replace(s,
'\s+(s\.?r\.?l\.?(\s*-?\s*d)?|s\.?a\.?|s\.?n\.?c\.?|s\.?c\.?s\.?|s\.?c\.?a\.?|p\.?f\.?a\.?|i\.?i\.?|i\.?f\.?)\s*\.?\s*$',
'', 'i');
-- Collapse internal punctuation/whitespace
s := regexp_replace(s, '[\.,\-\(\)/\\]+', ' ', 'g');
s := regexp_replace(s, '\s+', ' ', 'g');
RETURN trim(s);
END;
$$;
-- Materialized helper column on firms.entities (no UPDATE — generated)
ALTER TABLE firms.entities
ADD COLUMN IF NOT EXISTS name_normalized text
GENERATED ALWAYS AS (firms.normalize_company_name(name)) STORED;
CREATE INDEX IF NOT EXISTS idx_entities_name_normalized ON firms.entities(name_normalized);
CREATE INDEX IF NOT EXISTS idx_entities_name_norm_trgm ON firms.entities USING gin (name_normalized gin_trgm_ops);
-- Optional: judet normalization to disambiguate
CREATE OR REPLACE FUNCTION firms.normalize_judet(input text)
RETURNS text
LANGUAGE sql
IMMUTABLE
PARALLEL SAFE
AS $$
SELECT lower(public.unaccent(coalesce(input, '')))
$$;
@@ -0,0 +1,43 @@
-- 020_fonduri_proiect_v2.sql
-- Refactor fonduri.beneficiar_proiect to match what the source actually exposes.
-- Source: https://beneficiar.fonduri-ue.ro:8080/proiecte/details/1/{id}
--
-- The page exposes only 7 fields (Cod SMIS, Program operațional, Axa, Domeniul
-- de intervenție, Operațiune, Beneficiar, Data contract) — NOT valoare_totala/
-- valoare_eligibila/contributie_ue/data_start/data_end/judet/localitate that
-- the original aspirational schema (018) implied. Drop unused fields, add the
-- ones we can populate, split each "Program/Axa/Domeniul/Operațiune" into a
-- {cod, text} pair (first whitespace-separated token = code, rest = text).
--
-- The placeholder table from 018 has 0 rows → safe to drop + recreate.
DROP TABLE IF EXISTS fonduri.beneficiar_proiect;
CREATE TABLE fonduri.beneficiar_proiect (
id integer PRIMARY KEY, -- /proiecte/details/{type}/{id} → id
proiect_type smallint NOT NULL, -- 1=SMIS (only type seen so far)
smis_code text, -- "Cod SMIS" e.g. "313646"
titlu text, -- from <title> tag
beneficiar_name text, -- "REALMET SRL"
program_op_cod text, -- "PRNE" / "POIM" / "POR" / "PNRR" …
program_op_text text, -- "Program Regional Nord-Est"
axa_cod text, -- "PRNE_P1"
axa_text text, -- "P1.P1. Nord-Est O regiune mai competitivă…"
domeniul_cod text, -- "RSO1.3"
domeniul_text text, -- "RSO1.3_Intensificarea creșterii…"
operatiune_cod text, -- "PRNE_A18"
operatiune_text text, -- "Investiții pentru modernizarea…"
data_contract date,
cui text, -- fuzzy-matched later
cui_match_score real,
cui_match_method text,
matched_at timestamptz,
fetched_at timestamptz DEFAULT now(),
raw_html_sha256 char(64)
);
CREATE INDEX idx_ben_proiect_smis ON fonduri.beneficiar_proiect(smis_code);
CREATE INDEX idx_ben_proiect_program ON fonduri.beneficiar_proiect(program_op_cod);
CREATE INDEX idx_ben_proiect_axa ON fonduri.beneficiar_proiect(axa_cod);
CREATE INDEX idx_ben_proiect_cui ON fonduri.beneficiar_proiect(cui) WHERE cui IS NOT NULL;
CREATE INDEX idx_ben_proiect_data ON fonduri.beneficiar_proiect(data_contract DESC NULLS LAST);
@@ -0,0 +1,74 @@
-- 021_regas_ajutoare.sql
-- RegAS — Registrul Ajutoarelor de Stat (Consiliul Concurenței).
-- Source: https://regas.consiliulconcurentei.ro/transparenta/index.html
-- API: POST /apitransparenta/cautareTransparenta (XSRF-TOKEN cookie + header)
-- Volume at first scrape (2026-05-09): 132,363 ajutoare individuale.
--
-- Each row = one ajutor de stat acordat unei firme (denumireBeneficiar+cui),
-- cu masura (referintaMasura SA.xxx/yyyy), categorie/subcategorie, suma în RON,
-- finantator (ministerul/agentia care a dat banii), instrumentAcordare
-- (fonduri nerambursabile / credite / garantii / scutiri).
--
-- Cheie naturală: (cui, idMasura, dataAcordare, ajutorAcordatSubcategorie) —
-- același beneficiar poate primi multiple tranșe pe aceeași măsură. Folosim
-- un id sintetic SHA1 pentru ON CONFLICT idempotent.
CREATE SCHEMA IF NOT EXISTS regas;
CREATE TABLE IF NOT EXISTS regas.ajutoare (
id char(40) PRIMARY KEY, -- sha1(cui|idMasura|dataAcordare|subcategorie|sumaSubcategorie)
cui text NOT NULL,
denumire_beneficiar text NOT NULL,
raf text, -- "Registrul Ajutoarelor Fiscale" id (rare)
alt_cod text, -- ID alternativ (rare)
dimensiune_intreprindere text, -- "întreprindere mare" / "IMM" / etc.
regiune text[], -- ["Regiunea I Nord-Est", ...] sau ["Toate regiunile"]
domeniu_activitate text, -- CAEN cod (4 digits)
data_acordare date, -- "DD/MM/YYYY" parsed
id_masura integer NOT NULL, -- FK logic la /apitransparenta/downloadPdfMasura/{idMasura}
denumire_masura text NOT NULL,
referinta_masura text, -- "SA.104966/2022"
activitati_finantate text[], -- CAEN codes
categorie text,
subcategorie text,
obiectiv text,
ajutor_acordat_categorie numeric(20,2), -- RON
ajutor_acordat_subcategorie numeric(20,2), -- RON (de obicei = categorie pentru o singură subcategorie)
intermediari_financiari text[],
executanti text[],
instrument_acordare text, -- "fonduri nerambursabile" / "garantii" / "credite"
intensitate numeric(6,2), -- procent (e.g. 65.00)
finantator text, -- "Ministerul ... (MIPE)"
pdf_masura text, -- "1195_MASURA.pdf"
fetched_at timestamptz DEFAULT now(),
raw_json jsonb
);
CREATE INDEX IF NOT EXISTS idx_regas_ajutoare_cui ON regas.ajutoare(cui);
CREATE INDEX IF NOT EXISTS idx_regas_ajutoare_data ON regas.ajutoare(data_acordare DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_regas_ajutoare_finantator ON regas.ajutoare(finantator);
CREATE INDEX IF NOT EXISTS idx_regas_ajutoare_referinta ON regas.ajutoare(referinta_masura);
CREATE INDEX IF NOT EXISTS idx_regas_ajutoare_id_masura ON regas.ajutoare(id_masura);
COMMENT ON TABLE regas.ajutoare IS
'State aid records from Consiliul Concurentei RegAS portal. One row per ajutor acordat. Source: https://regas.consiliulconcurentei.ro/transparenta/';
COMMENT ON COLUMN regas.ajutoare.id IS
'Synthetic sha1 of (cui|idMasura|dataAcordare|subcategorie|ajutorAcordatSubcategorie). Used for idempotent upsert.';
-- Materialized view aggregating per CUI for fast firma profile lookup.
CREATE MATERIALIZED VIEW IF NOT EXISTS regas.mv_ajutoare_per_cui AS
SELECT
cui,
COUNT(*) AS nr_ajutoare,
SUM(ajutor_acordat_subcategorie) AS total_ron,
COUNT(DISTINCT id_masura) AS nr_masuri,
COUNT(DISTINCT finantator) AS nr_finantatori,
MIN(data_acordare) AS prima_acordare,
MAX(data_acordare) AS ultima_acordare,
array_agg(DISTINCT finantator) FILTER (WHERE finantator IS NOT NULL) AS finantatori,
array_agg(DISTINCT instrument_acordare) FILTER (WHERE instrument_acordare IS NOT NULL) AS instrumente
FROM regas.ajutoare
GROUP BY cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_regas_mv_cui ON regas.mv_ajutoare_per_cui(cui);
@@ -0,0 +1,203 @@
-- 024_aep_donatii.sql
-- AEP — Autoritatea Electorală Permanentă — donații electorale & finanțare partide.
--
-- Sursă oficială (mandatată prin Legea 334/2006): rapoartele anuale + rapoartele
-- de venituri și cheltuieli (RVC) ale partidelor + listele de donatori publicate
-- în Monitorul Oficial pentru donații > 10 salarii minime brute.
--
-- Vehicul de ingest: portalul Expert Forum (banipartide.ro) care a aggregat-o
-- deja în SQLite și o expune via endpoint base64-SQL la
-- https://www.banipartide.ro/app/json.php?mode=dt&ssid=<base64>.
-- (Sursele primare AEP sunt PDF/Excel + reCAPTCHA, deci EFOR este path de
-- minim efort. Validate against AEP RVC PDFs as v2.)
--
-- Volume @ 2026-05-09:
-- Donatori persoane juridice (>10 sal MO): 3,612 (2006-2024)
-- Donatori persoane fizice (>10 sal MO): 30,792 (2006-2024)
-- Donatori RVC (rapoarte venituri/cheltuieli, granular complet): 353,473
--
-- GDPR: CNP-urile sunt expuse în clear pe banipartide.ro (publicate în MO conf.
-- legii). Le hash-ăm SHA256 pe ingest — la noi NU stocăm CNP raw. Numele
-- complet e public prin lege și rămâne. Adresa pe firme (PJ) e public,
-- la persoane fizice (PF) NU avem adresă la sursă.
--
-- Cross-source value: aep.donatii_pj.donator_cui seap.announcements.supplier_cui
-- = "donator X a donat Y RON partidului Z, apoi a câștigat W RON contracte SEAP".
CREATE SCHEMA IF NOT EXISTS aep;
COMMENT ON SCHEMA aep IS
'Autoritatea Electorală Permanentă — donații, finanțare partide, RVC. Sursă: banipartide.ro (EFOR) → AEP/MO.';
-- ──────────────────────────────────────────────────────────────────────────
-- aep.partide — registru partide normalizat (codes from banipartide source)
-- ──────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS aep.partide (
id text PRIMARY KEY, -- 'PSD', 'PNL', 'USR', 'AUR', 'UDMR', etc.
nume_oficial text, -- 'Partidul Social Democrat'
fondat date,
sediu_cui text, -- CIF al partidului dacă cunoscut
status text, -- 'activ' | 'dizolvat' | 'fuzionat'
fetched_at timestamptz DEFAULT now()
);
COMMENT ON TABLE aep.partide IS
'Registru partide politice (cheie naturală = abreviere normalizată din sursa banipartide).';
-- ──────────────────────────────────────────────────────────────────────────
-- aep.donatii_pj — donații de la persoane juridice (>10 salarii minime, MO)
-- ──────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS aep.donatii_pj (
id bigserial PRIMARY KEY,
source_hash char(40) NOT NULL UNIQUE, -- sha1(nume|cui|partid|an|suma|data_donatie) for idempotent upsert
donator_nume text NOT NULL,
donator_cui text, -- CUI normalizat (numerals only, RO prefix stripped)
donator_cui_raw text, -- forma originală (poate avea typos / "RO")
reprezentant text,
sediu text,
nationalitate text, -- "română" / etc.
partid_id text REFERENCES aep.partide(id) ON UPDATE CASCADE,
filiala_partid text,
suma_lei numeric(14,2) NOT NULL,
an smallint NOT NULL,
data_donatie_text text, -- format mixt în sursă: "11.10.2019; 13.11.2019" sau "10042010" — păstrăm raw
data_donatie date, -- best-effort parsed (NULL când format incompatibil sau multiple)
tip_donatie text, -- "Bani" / "Natură" / etc.
felul_donatie text, -- "Bani" / "Ordin De Plată" / "Spațiu Publicitar"
source_url text NOT NULL DEFAULT 'https://www.banipartide.ro/donatori-persoane-juridice.html',
fetched_at timestamptz DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pj_cui ON aep.donatii_pj(donator_cui) WHERE donator_cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pj_partid ON aep.donatii_pj(partid_id);
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pj_an ON aep.donatii_pj(an);
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pj_suma ON aep.donatii_pj(suma_lei DESC);
COMMENT ON TABLE aep.donatii_pj IS
'Donații de la persoane juridice către partide, peste pragul de 10 salarii minime brute (publicate în MO). Sursă: banipartide.ro → AEP. Granularitate: o linie per (donator, partid, an, sumă, dată).';
COMMENT ON COLUMN aep.donatii_pj.source_hash IS
'sha1(nume_lower|cui|partid|an|suma|data_text). Garantează idempotenta scraperului.';
-- ──────────────────────────────────────────────────────────────────────────
-- aep.donatii_pf — donații de la persoane fizice (>10 sal min, MO)
-- CNP hash-uit (NICIODATĂ raw în DB).
-- ──────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS aep.donatii_pf (
id bigserial PRIMARY KEY,
source_hash char(40) NOT NULL UNIQUE, -- sha1(nume|cnp_hash|partid|an|suma|data)
donator_nume text NOT NULL,
donator_cnp_sha256 char(64), -- SHA-256 hex of CNP (only if CNP was non-empty in source)
partid_id text REFERENCES aep.partide(id) ON UPDATE CASCADE,
organizatia text, -- filiala / organizatia partidului
suma_lei numeric(14,2) NOT NULL,
an smallint NOT NULL,
data_donatie_text text,
data_donatie date,
tip_donatie text, -- "Donație" / "Cotizație" / "Împrumut"
ce_s_a_donat text, -- "Bani" / "Bunuri" / etc.
source_url text NOT NULL DEFAULT 'https://www.banipartide.ro/donatori-persoane-fizice.html',
fetched_at timestamptz DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pf_cnp_hash ON aep.donatii_pf(donator_cnp_sha256) WHERE donator_cnp_sha256 IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pf_nume ON aep.donatii_pf(donator_nume);
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pf_partid ON aep.donatii_pf(partid_id);
CREATE INDEX IF NOT EXISTS idx_aep_donatii_pf_an ON aep.donatii_pf(an);
COMMENT ON TABLE aep.donatii_pf IS
'Donații de la persoane fizice către partide, peste pragul de 10 salarii minime (publicate în MO). CNP-urile sunt SHA-256 hashed la ingest. Sursă: banipartide.ro.';
COMMENT ON COLUMN aep.donatii_pf.donator_cnp_sha256 IS
'SHA-256 hex digest al CNP. Permite re-identificare dacă cineva are CNP-ul, dar nu dezvăluie CNP-ul. NU e key-uit cu salt — scopul e doar de-duplicare cross-an, nu protecție criptografică împotriva brute-force pe spațiul CNP-urilor românești.';
-- ──────────────────────────────────────────────────────────────────────────
-- aep.donatii_rvc — toți donatorii din rapoartele de venituri/cheltuieli
-- (donații + cotizații + împrumuturi, fără pragul de 10 salarii)
-- ──────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS aep.donatii_rvc (
id bigserial PRIMARY KEY,
source_hash char(40) NOT NULL UNIQUE,
donator_nume text NOT NULL,
judet text, -- "Alba", "București"
cod_judet text, -- "AB", "B"
tip_venit text, -- "Cotizație" | "Donație" | "Împrumut"
partid_id text REFERENCES aep.partide(id) ON UPDATE CASCADE,
suma_lei numeric(14,2) NOT NULL,
mod_incasare text, -- "Banca" | "Numerar" | etc.
an smallint NOT NULL,
data_donatie_text text,
data_donatie date,
source_url text NOT NULL DEFAULT 'https://www.banipartide.ro/donatori-rvc.html',
fetched_at timestamptz DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_aep_donatii_rvc_partid ON aep.donatii_rvc(partid_id);
CREATE INDEX IF NOT EXISTS idx_aep_donatii_rvc_an ON aep.donatii_rvc(an);
CREATE INDEX IF NOT EXISTS idx_aep_donatii_rvc_nume ON aep.donatii_rvc(donator_nume);
CREATE INDEX IF NOT EXISTS idx_aep_donatii_rvc_judet ON aep.donatii_rvc(judet);
COMMENT ON TABLE aep.donatii_rvc IS
'Toate donațiile/cotizațiile/împrumuturile din rapoartele de venituri și cheltuieli (RVC) ale partidelor, fără pragul de 10 salarii. ~353K rânduri. Sursă: banipartide.ro → AEP.';
-- ──────────────────────────────────────────────────────────────────────────
-- aep.scrape_log — audit trail al scraperelor (per tabel × per zi)
-- ──────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS aep.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'donatii_pj' | 'donatii_pf' | 'donatii_rvc'
source_url text NOT NULL,
rows_seen integer NOT NULL,
rows_inserted integer NOT NULL,
rows_updated integer NOT NULL,
rows_skipped integer NOT NULL,
duration_ms integer NOT NULL,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_aep_scrape_log_scraper ON aep.scrape_log(scraper, started_at DESC);
-- ──────────────────────────────────────────────────────────────────────────
-- Materialized view: agregare per CUI pentru profile firmă rapid.
-- (refresh-ed by cron post-scrape; see refresh-mvs.sh)
-- ──────────────────────────────────────────────────────────────────────────
CREATE MATERIALIZED VIEW IF NOT EXISTS aep.mv_donatii_per_cui AS
SELECT
donator_cui AS cui,
COUNT(*) AS nr_donatii,
SUM(suma_lei) AS total_lei,
COUNT(DISTINCT partid_id) AS nr_partide,
array_agg(DISTINCT partid_id)
FILTER (WHERE partid_id IS NOT NULL) AS partide,
MIN(an) AS prima_donatie_an,
MAX(an) AS ultima_donatie_an
FROM aep.donatii_pj
WHERE donator_cui IS NOT NULL
GROUP BY donator_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_aep_mv_donatii_per_cui ON aep.mv_donatii_per_cui(cui);
COMMENT ON MATERIALIZED VIEW aep.mv_donatii_per_cui IS
'Pre-aggregat pentru profile firmă: donații totale per CUI. Refresh după fiecare scrape.';
-- ──────────────────────────────────────────────────────────────────────────
-- Materialized view: top donatori per partid (folosit pe pagini publice)
-- ──────────────────────────────────────────────────────────────────────────
CREATE MATERIALIZED VIEW IF NOT EXISTS aep.mv_top_donatori_partid AS
SELECT
partid_id,
donator_nume,
donator_cui,
COUNT(*) AS nr_donatii,
SUM(suma_lei) AS total_lei,
MIN(an) AS prima_donatie_an,
MAX(an) AS ultima_donatie_an
FROM aep.donatii_pj
WHERE partid_id IS NOT NULL
GROUP BY partid_id, donator_nume, donator_cui;
CREATE INDEX IF NOT EXISTS idx_aep_mv_top_donatori_partid_partid ON aep.mv_top_donatori_partid(partid_id, total_lei DESC);
CREATE INDEX IF NOT EXISTS idx_aep_mv_top_donatori_partid_cui ON aep.mv_top_donatori_partid(donator_cui) WHERE donator_cui IS NOT NULL;
COMMENT ON MATERIALIZED VIEW aep.mv_top_donatori_partid IS
'Top donatori per partid pentru afișare publică. Datele sunt deja publice prin lege (MO).';
@@ -0,0 +1,96 @@
-- 025_anaf_datornici.sql
-- ANAF — Lista contribuabililor cu obligații fiscale restante (datornici).
-- Source: https://www.anaf.ro/restante/ (publicare trimestrială, Ord. 558/2016).
-- Plus lista albă (contribuabili FĂRĂ datorii) la /restante/listaalba.xhtml.
--
-- Bazele legale: ANAF publică trimestrial sumele restante peste plafoane —
-- 500.000 lei (mari contribuabili), 250.000 lei (mijlocii), 100.000 lei
-- (mici), 10.000 lei (instituții publice). Sub plafon nu se publică.
--
-- KILLER USE CASE: cross-reference cu seap.announcements pentru a găsi
-- "firme datornice care au câștigat contracte publice" — interzis prin
-- art. 165 Legea 98/2016 dacă sunt obligații fiscale executorii.
--
-- IMPORTANT — limitări surse de date (2026-05-09):
-- 1. anaf.ro/restante/index.xhtml e o aplicație JSF/PrimeFaces cu CAPTCHA
-- de tip kaptcha pe submit. Nu e bulk-scrapeable fără OCR/captcha-solver
-- pentru cele ~5K-15K rânduri per trimestru (×4 trim × ~10 ani = ~500K).
-- 2. data.gov.ro publică UN SINGUR snapshot Q1-2016 (mari/mijlocii/micijuridice
-- CSV) — 140,780 rânduri, util ca baseline istoric.
-- 3. listafirme.eu agregă ANAF datornici în spatele unui paywall API.
--
-- Strategia ingest:
-- - Faza 1 (THIS): schema + importer CSV pentru data.gov.ro Q1-2016 snapshot.
-- ~140K rânduri reale, validează schema end-to-end.
-- - Faza 2 (TODO): scraper cu captcha-solver extern (anti-captcha.com /
-- 2captcha) pentru anaf.ro/restante/ live + arhive trimestriale dacă găsim.
-- - Faza 3: integrare cu firms.entities pentru profile badges + recipe-uri.
CREATE SCHEMA IF NOT EXISTS anaf;
-- ── Tabelă principală: datornici per (CUI × dată publicare) ─────────────────
CREATE TABLE IF NOT EXISTS anaf.datornici (
cui text NOT NULL, -- fără prefix RO
name text, -- denumirea contribuabilului
judet text, -- 2026: nu e disponibil în CSV-urile data.gov.ro Q1-2016, dar e expus în XHTML live
publication_date date NOT NULL, -- prima zi a trimestrului (2016-01-01 = T1 2016)
period_label text NOT NULL, -- 'T1 2016' / 'T2 2024' etc.
debtor_category text, -- 'mari' | 'mijlocii' | 'mici' | 'institutii_publice' | 'persoane_fizice'
debt_total numeric(20,2), -- suma RON (principal + accesorii la toate cele 4 bugete)
debt_principal numeric(20,2), -- suma RON (principal la toate cele 4 bugete)
debt_penalty numeric(20,2), -- suma RON (accesorii la toate cele 4 bugete)
debt_contested numeric(20,2), -- suma RON contestată (necontestată = total - contested)
-- Detaliu per buget (păstrăm pentru forensică, deși total/principal/penalty
-- agregat e suficient pentru majoritatea recipes):
budget_state_principal numeric(20,2),
budget_state_penalty numeric(20,2),
budget_state_contested numeric(20,2),
budget_social_principal numeric(20,2),
budget_social_penalty numeric(20,2),
budget_social_contested numeric(20,2),
budget_unemployment_principal numeric(20,2),
budget_unemployment_penalty numeric(20,2),
budget_unemployment_contested numeric(20,2),
budget_health_principal numeric(20,2),
budget_health_penalty numeric(20,2),
budget_health_contested numeric(20,2),
source_url text, -- URL original al CSV / XHTML
fetched_at timestamptz DEFAULT now(),
PRIMARY KEY (cui, publication_date)
);
CREATE INDEX IF NOT EXISTS idx_anaf_datornici_cui ON anaf.datornici(cui);
CREATE INDEX IF NOT EXISTS idx_anaf_datornici_pub_date ON anaf.datornici(publication_date DESC);
CREATE INDEX IF NOT EXISTS idx_anaf_datornici_total ON anaf.datornici(debt_total DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_anaf_datornici_category ON anaf.datornici(debtor_category);
-- ── Lista albă: firme FĂRĂ obligații restante (eligibile la SEAP) ───────────
-- Se publică separat la /restante/listaalba.xhtml. Mai puțin acționabilă, dar
-- utilă pentru a confirma negativ "firma X NU avea datorii când a câștigat
-- contractul Y" (când lipsește din .datornici nu înseamnă neapărat că nu
-- avea — poate fi sub plafon).
CREATE TABLE IF NOT EXISTS anaf.lista_alba (
cui text NOT NULL,
name text,
publication_date date NOT NULL,
period_label text NOT NULL,
source_url text,
fetched_at timestamptz DEFAULT now(),
PRIMARY KEY (cui, publication_date)
);
CREATE INDEX IF NOT EXISTS idx_anaf_lista_alba_cui ON anaf.lista_alba(cui);
CREATE INDEX IF NOT EXISTS idx_anaf_lista_alba_pub_date ON anaf.lista_alba(publication_date DESC);
-- ── View: cea mai recentă publicare per CUI (latest debt status) ────────────
CREATE OR REPLACE VIEW anaf.datornici_latest AS
SELECT DISTINCT ON (cui)
cui, name, judet, publication_date, period_label, debtor_category,
debt_total, debt_principal, debt_penalty, debt_contested
FROM anaf.datornici
ORDER BY cui, publication_date DESC;
COMMENT ON SCHEMA anaf IS 'ANAF (Agenția Națională de Administrare Fiscală) public registries';
COMMENT ON TABLE anaf.datornici IS 'Lista contribuabililor cu obligații restante, publicată trimestrial (Ord. 558/2016)';
COMMENT ON TABLE anaf.lista_alba IS 'Lista albă: contribuabili FĂRĂ obligații restante la data publicării';
COMMENT ON VIEW anaf.datornici_latest IS 'Cel mai recent snapshot al datoriilor per CUI';
+224
View File
@@ -0,0 +1,224 @@
-- 026_bugetar.sql
-- Transparență Bugetară MFP — execuții bugetare ale entităților publice.
-- Source primar: https://mfinante.gov.ro/apps/transparenta-bugetara/index.htm
-- → redirecționează spre aplicația activă: extranet.anaf.mfinante.gov.ro/anaf/extranet/EXECUTIEBUGETARA
--
-- KILLER USE CASE: cross-reference cu seap.announcements pentru a calcula
-- "ponderea unui furnizor SEAP în cheltuielile totale ale unei UAT" — adică
-- "Comuna X a cheltuit 80% din buget cu 1 furnizor". Plus "Capitole bugetare
-- consumate disproporționat de 1 firmă" (cap 51 admin pub × top vendor).
--
-- Volum estimat: ~13.700 entități × 12 luni × 5 ani × ~30 linii/raport
-- ≈ 25M rânduri pentru detaliat (FXB-EXB-900). Pentru raport
-- COFOG3 agregat (FXB-EXB-901), ~822K rânduri pentru perioada
-- 2021-2025 la nivel ordonator principal.
--
-- ─── LIMITĂRI SURSE DE DATE (2026-05-09) ────────────────────────────────────
-- 1. Aplicația oficială (extranet.anaf.mfinante.gov.ro/EXECUTIEBUGETARA) e
-- IBM WebSphere Portal cu CAPTCHA imagine pe fiecare căutare. Endpoint-ul
-- de căutare e POST cu un URL stateful + `seccode`. Nu există URL deep
-- direct per (CUI, perioadă) fără sesiune + captcha solver.
-- 2. Există un endpoint de autocomplete EXPUS fără captcha care întoarce TOATE
-- denumirile entităților publice per (sector_bugetar, județ):
-- POST /Rapoarte_Forexe/.../res/id=populateEpAJAX/.../
-- data: idSector=02&idJudet=CJ
-- response: ["BIBLIOTECA JUDETEANA OCTAVIAN GOGA CLUJ", ...] (JSON array).
-- Util pentru a construi universul ~13.7K entități, dar NU întoarce CUI-urile.
-- 3. Fișiere XML/XLSX detaliate (FXB-EXB-900) se descarcă DOAR dintr-un raport
-- de rezultate randat după captcha. Hash-uri de URL sunt valide ~minute.
-- 4. data.gov.ro publică doar agregate naționale (BGC = Bugetul General
-- Consolidat) ca XLS lunar — NU per-CUI.
-- 5. Multe primării publică propriile execuții pe site-urile lor (PDF/XLSX),
-- dar formatele variază — Plan B pentru top-N municipii.
--
-- ─── STRATEGIA INGEST (faze) ────────────────────────────────────────────────
-- Faza 1 (THIS migration): schema completă pregătită pentru parser FXB-EXB-900
-- + tabelă auxiliară bugetar.entitate cu universul EP din autocomplete API
-- (~13.7K rânduri × 5 sectoare × 42 județe ≈ 30K seed-uri ce vor fi dedupe-uite).
-- + descrierea formatelor XML/XLSX (din PDF-urile MFP "Structura fisier XML
-- raport FXB-900/901/905") așa încât parserul să fie deterministic.
-- Faza 2 (TODO ~80h): integrare captcha solver (2captcha/anti-captcha) +
-- crawler asincron care urmează (sector × județ × tipRaport × an × lună).
-- Faza 3: cross-link cu firms.entities + seap.announcements pentru recipe-uri
-- "buget vs procurement".
CREATE SCHEMA IF NOT EXISTS bugetar;
-- ────────────────────────────────────────────────────────────────────────────
-- Tabel principal: linii de execuție bugetară per (entitate × perioadă × clasificație)
-- ────────────────────────────────────────────────────────────────────────────
-- Format aliniat pe FXB-EXB-900 (raport detaliat per entitate publică) +
-- agregatele FXB-EXB-901 (ordonator principal) și FXB-EXB-905 (ordonator secundar).
-- Câmpurile clasificării bugetare urmăresc structura ROMC (Clasificația
-- Bugetară Românească): Capitol → Subcapitol → Paragraf → Articol → Aliniat.
CREATE TABLE IF NOT EXISTS bugetar.executie (
id bigserial PRIMARY KEY,
-- Identificare entitate
cui text NOT NULL, -- CUI entitate publică (fără prefix RO)
cui_ordonator text, -- CUI ordonator principal (poate diferi de cui)
entity_name text, -- denumire la momentul raportării
sector_bugetar text, -- '01' BS, '02' BL, '03' BASS, '04' SOMAJ, '05' FNUASS
judet text, -- cod 2 litere (AB, CJ, B, ...)
-- Perioada raportării
period text NOT NULL, -- 'YYYY-MM' (cumulat de la 1 ian până la sfârșitul lunii)
period_year smallint NOT NULL,
period_month smallint NOT NULL, -- 1..12
-- Tip raport sursă
raport_tip text NOT NULL, -- 'FXB-EXB-900' | 'FXB-EXB-901' | 'FXB-EXB-905' | 'FXB-RBG-003' | 'FXB-EXB-902'
raport_nivel text, -- 'entitate' | 'ordonator_principal' | 'ordonator_secundar'
-- Clasificația bugetară (5 niveluri Cf. ROMC)
side text NOT NULL, -- 'venituri' | 'cheltuieli'
capitol text, -- 4 cifre, ex '5101' = Autorități publice
subcapitol text, -- 6 cifre, ex '510102'
paragraf text, -- 8 cifre
articol text, -- 10 cifre, ex '5101010101'
aliniat text, -- 12 cifre (rar folosit)
classification_label text, -- denumire human-readable
cofog3 text, -- cod COFOG3 (Classification of Functions of Government, agregat)
-- Sumele cheie (toate în RON, cumulat de la 1 ian)
-- Înțeles per FXB-EXB-900:
-- credite_bug_aprobate_ini = bugetul aprobat inițial pentru anul curent
-- credite_bug_aprobate_def = bugetul aprobat definitiv (după rectificări) la sfârșit perioadă
-- credite_bug_trimestru = creditele bugetare trimestriale cumulate
-- angajamente_bugetare = sumele angajate (FXB-EXB-902)
-- angajamente_legale = sumele angajate prin contracte ferme
-- plati_efectuate = plăți efective la sfârșit perioadă (= "execuția cumulată")
-- incasari_realizate = pentru side='venituri', sumele încasate
credite_bug_aprobate_ini numeric(20,2),
credite_bug_aprobate_def numeric(20,2),
credite_bug_trimestru numeric(20,2),
angajamente_bugetare numeric(20,2),
angajamente_legale numeric(20,2),
plati_efectuate numeric(20,2),
incasari_realizate numeric(20,2),
-- Sumă "primary" pentru queries simple — pentru side='cheltuieli' = plati_efectuate,
-- pentru side='venituri' = incasari_realizate. Calculat la INSERT.
suma_executat numeric(20,2),
-- Metadata sursă
source_url text, -- URL original al fișierului XML/XLSX
source_hash text, -- sha256(URL + filename) pentru dedup
fetched_at timestamptz NOT NULL DEFAULT now(),
-- Constraint de unicitate: un (entitate, perioadă, side, clasificare, raport_tip, sursa) e unic.
-- aliniat poate fi NULL — folosim COALESCE prin index parțial.
CONSTRAINT uq_bugetar_executie_full UNIQUE
(cui, period, raport_tip, side, capitol, subcapitol, paragraf, articol, aliniat, sector_bugetar)
);
CREATE INDEX IF NOT EXISTS idx_bugetar_executie_cui_year ON bugetar.executie(cui, period_year DESC);
CREATE INDEX IF NOT EXISTS idx_bugetar_executie_period ON bugetar.executie(period_year, period_month);
CREATE INDEX IF NOT EXISTS idx_bugetar_executie_capitol ON bugetar.executie(capitol) WHERE capitol IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_bugetar_executie_judet_sector ON bugetar.executie(judet, sector_bugetar);
CREATE INDEX IF NOT EXISTS idx_bugetar_executie_side_amount ON bugetar.executie(side, suma_executat DESC NULLS LAST);
COMMENT ON TABLE bugetar.executie IS
'Linii de execuție bugetară (FXB-EXB-900/901/905). Un rând per (entitate, perioadă, clasificație, side).';
COMMENT ON COLUMN bugetar.executie.suma_executat IS
'Suma "principală" pentru queries: plati_efectuate la cheltuieli, incasari_realizate la venituri.';
-- ────────────────────────────────────────────────────────────────────────────
-- Universul entităților publice raportoare (descoperit din autocomplete API)
-- ────────────────────────────────────────────────────────────────────────────
-- Pasul 1 al ingest-ului: enumeră (sector_bugetar × județ) → descarcă lista
-- denumirilor de entități publice. Apoi fuzzy-match cu firms.entities pentru a
-- atașa CUI. Asta deblochează crawl-ul Fazei 2 (când avem captcha solver).
CREATE TABLE IF NOT EXISTS bugetar.entitate (
id bigserial PRIMARY KEY,
entity_name text NOT NULL, -- denumire raw din MFP (case-sensitive)
sector_bugetar text NOT NULL, -- '01' .. '05'
judet text NOT NULL, -- cod 2 litere
cui text, -- atașat post-hoc prin fuzzy match
cui_match_score numeric(5,2), -- 0..1 confidence pentru matching
cui_match_method text, -- 'exact' | 'fuzzy_anaf' | 'manual'
is_ordonator_principal boolean DEFAULT false, -- true dacă apare în lista populateOcpAJAX
raport_count integer DEFAULT 0, -- număr de raport-perioade descărcate cu succes
last_fetched_period text, -- 'YYYY-MM' al ultimei perioade ingest-uate
fetched_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (entity_name, sector_bugetar, judet)
);
CREATE INDEX IF NOT EXISTS idx_bugetar_entitate_cui ON bugetar.entitate(cui) WHERE cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_bugetar_entitate_judet ON bugetar.entitate(judet, sector_bugetar);
CREATE INDEX IF NOT EXISTS idx_bugetar_entitate_name_trgm ON bugetar.entitate USING gin(entity_name gin_trgm_ops);
COMMENT ON TABLE bugetar.entitate IS
'Universul entităților publice raportoare descoperit din autocomplete API MFP. CUI-ul se atașează post-hoc prin fuzzy match cu firms.entities.';
-- ────────────────────────────────────────────────────────────────────────────
-- Job tracking — pentru reluare crawl la întreruperi
-- ────────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS bugetar.crawl_job (
id bigserial PRIMARY KEY,
cui text,
entity_name text,
period text NOT NULL, -- 'YYYY-MM'
raport_tip text NOT NULL,
status text NOT NULL DEFAULT 'pending', -- 'pending' | 'fetched' | 'parsed' | 'failed' | 'no_data'
attempts smallint NOT NULL DEFAULT 0,
last_error text,
rows_inserted integer,
fetched_at timestamptz,
parsed_at timestamptz,
updated_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (cui, period, raport_tip)
);
CREATE INDEX IF NOT EXISTS idx_bugetar_crawl_status ON bugetar.crawl_job(status, period);
-- ────────────────────────────────────────────────────────────────────────────
-- Materialized views pentru dashboard rapid
-- ────────────────────────────────────────────────────────────────────────────
-- Sumar per (CUI × an): venituri totale + cheltuieli totale + nr luni raportate.
CREATE MATERIALIZED VIEW IF NOT EXISTS bugetar.mv_per_cui_year AS
SELECT
cui,
period_year,
SUM(suma_executat) FILTER (WHERE side = 'venituri') AS venituri_total,
SUM(suma_executat) FILTER (WHERE side = 'cheltuieli') AS cheltuieli_total,
COUNT(DISTINCT period) AS months_reported,
MAX(entity_name) AS entity_name_sample,
MAX(judet) AS judet,
MAX(sector_bugetar) AS sector_bugetar
FROM bugetar.executie
GROUP BY cui, period_year;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_bugetar_cui_year ON bugetar.mv_per_cui_year(cui, period_year);
CREATE INDEX IF NOT EXISTS idx_mv_bugetar_year_chelt ON bugetar.mv_per_cui_year(period_year, cheltuieli_total DESC NULLS LAST);
COMMENT ON MATERIALIZED VIEW bugetar.mv_per_cui_year IS
'Sumar venituri+cheltuieli per (CUI × an). Refresh după fiecare ingest batch.';
-- Sumar per (CUI × an × capitol) — pentru analiza distribuției pe capitole bugetare.
CREATE MATERIALIZED VIEW IF NOT EXISTS bugetar.mv_per_cui_capitol_year AS
SELECT
cui,
period_year,
capitol,
side,
SUM(suma_executat) AS suma_total,
MAX(classification_label) AS capitol_label
FROM bugetar.executie
WHERE capitol IS NOT NULL
GROUP BY cui, period_year, capitol, side;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_bugetar_cui_cap_year
ON bugetar.mv_per_cui_capitol_year(cui, period_year, capitol, side);
COMMENT ON MATERIALIZED VIEW bugetar.mv_per_cui_capitol_year IS
'Sumar pe capitol bugetar per (CUI × an). Pentru analiza "pe ce s-au cheltuit banii".';
-- ────────────────────────────────────────────────────────────────────────────
-- COMMENTS & schema-level metadata
-- ────────────────────────────────────────────────────────────────────────────
COMMENT ON SCHEMA bugetar IS
'Transparență Bugetară MFP — execuția bugetară lunară a entităților publice. Sursă: https://mfinante.gov.ro/apps/transparenta-bugetara/';
@@ -0,0 +1,15 @@
-- 027_afir_tip_fond.sql
-- Augment fonduri.afir_plati with tip_fond discriminator to host both FEADR
-- (project-based development funds) and FEGA (per-hectare direct payments
-- to farmers) in the same fact table. Schema is near-identical between the
-- two; only specific columns are populated per fund (e.g. fega_op vs feadr_op).
--
-- Backwards compatible: existing 1.04M rows (2023+2024 FEADR) get tip_fond='FEADR'.
ALTER TABLE fonduri.afir_plati
ADD COLUMN IF NOT EXISTS tip_fond text NOT NULL DEFAULT 'FEADR';
-- Backfill any rows that were inserted before column existed
UPDATE fonduri.afir_plati SET tip_fond = 'FEADR' WHERE tip_fond IS NULL;
CREATE INDEX IF NOT EXISTS idx_afir_plati_tip_fond ON fonduri.afir_plati(tip_fond, source_year);
+136
View File
@@ -0,0 +1,136 @@
-- 028_anre.sql
-- ANRE — Autoritatea Națională de Reglementare în domeniul Energiei.
-- Public license/authorization registries scraped from portal.anre.ro/PublicLists.
--
-- Sources (all return JSON via Kendo Grid AJAX endpoint, pageSize=99999 returns full):
-- 1. /PublicLists/LicenteAutorizatii → ~4,927 licenses (electricitate)
-- flat columns: Societate, Sediu, Localitate, Judet, NrLicenta, DataEmitere,
-- DataExpirare, Stare, TipAL, TipActivitate, Comentariu
-- 2. /PublicLists/LicenteAutorizatiiGN → ~353 licenses (gaze naturale)
-- parent row per company, "Detaliu" is HTML <table> with multiple sub-rows
-- (Nr.Document, Tip document, Tip activitate, Localitate, Data emitere,
-- Data expirare, Stare, Decizie)
-- 3. /PublicLists/Atestate → ~9,745 atestate
-- parent row per company, "Detaliu" HTML <table> w/ Nr.atestat, Tip tarif,
-- Data emitere, Data expirare, Stare
-- 4. /PublicLists/AutorizatiiElectricieniAutorizati → ~101,529 electricieni autorizati
-- flat: NumePrenume, NrRegistru, Localitate, Judet, NrAutorizare,
-- TarifAutorizare, TipAutorizare, DataExpirare, Stare
--
-- Cross-source value: anre.licente.titular_cui (resolved via firms.normalize_company_name
-- fuzzy match) × seap.announcements.supplier_cui = "energy operators with state contracts".
-- Red-flag: company wins energy-related SEAP contract but has no ANRE license.
CREATE SCHEMA IF NOT EXISTS anre;
-- ── 1. Licente & autorizatii (companies) — unified flat ────────────────────
-- One row per distinct license document. license_source distinguishes the
-- 3 corporate sources (electricitate / gaze / atestate). Detaliu sub-rows
-- from GN/atestate are flattened to one row per sub-row. Source position
-- (NrCrt) is preserved in raw_json for traceability.
CREATE TABLE IF NOT EXISTS anre.licente (
id char(40) PRIMARY KEY, -- sha1(license_source|license_no|titular_name|data_emitere|tip_al)
license_source text NOT NULL, -- 'electricitate' | 'gaze' | 'atestat'
license_no text NOT NULL, -- "NrLicenta" / "Nr. Document" / "Nr. atestat"
license_type text, -- "Licenta" / "Autorizatie de Infiintare" / "Confirmare Licenta" / "Atestat"
license_subtype text, -- "TipActivitate" / "Tip document" / "Tip tarif" (e.g. "Producere", "Furnizare", "Tarif A1")
titular_name text NOT NULL, -- raw "Societate"
titular_name_norm text, -- firms.normalize_company_name(titular_name) — populated post-insert
titular_cui text, -- resolved via fuzzy match (NULL initially)
cui_match_score numeric(4,3),
cui_match_method text, -- 'exact_norm' / 'trgm_unique' / 'trgm_judet'
matched_at timestamptz,
sediu text, -- adresa
localitate text,
judet text,
telefon_fax text,
data_emitere date,
data_expirare date,
stare text, -- 'Acordata' / 'Expirata' / 'Retrasa' / 'Suspendata' / 'Incetat valabilitate, sub 1 MW' / etc.
decizie text, -- "Nr.Dec. 2223" — only GN
comentariu text, -- electricitate only
raw_json jsonb,
fetched_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_anre_licente_titular_cui ON anre.licente(titular_cui) WHERE titular_cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_anre_licente_titular_norm_trgm ON anre.licente USING gin (titular_name_norm gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_anre_licente_source_stare ON anre.licente(license_source, stare);
CREATE INDEX IF NOT EXISTS idx_anre_licente_data_expirare ON anre.licente(data_expirare);
CREATE INDEX IF NOT EXISTS idx_anre_licente_subtype ON anre.licente(license_subtype);
COMMENT ON TABLE anre.licente IS
'ANRE licenses & authorizations (electricitate + gaze + atestate). One row per distinct license document. Source: portal.anre.ro/PublicLists/{LicenteAutorizatii,LicenteAutorizatiiGN,Atestate}.';
COMMENT ON COLUMN anre.licente.id IS
'sha1(license_source|license_no|titular_name|data_emitere|license_type) — idempotent upsert key.';
COMMENT ON COLUMN anre.licente.license_source IS
'"electricitate" / "gaze" / "atestat" — source registry.';
-- ── 2. Electricieni autorizati (individuals) ───────────────────────────────
-- People, not firms. No CUI; we keep just to enable lookups by name.
CREATE TABLE IF NOT EXISTS anre.electricieni (
id bigserial PRIMARY KEY,
nume_prenume text NOT NULL,
nr_registru integer, -- "NrRegistru"
nr_autorizare integer, -- "NrAutorizare" — natural unique key
tip_autorizare text, -- "Autorizare Electricieni"
tarif text, -- "Tarif II B" / "Tarif IV"
localitate text,
judet text,
telefon_fax text,
data_expirare date,
stare text, -- "Activ" / "Expirat" / "Retras"
raw_json jsonb,
fetched_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (nr_autorizare, nume_prenume)
);
CREATE INDEX IF NOT EXISTS idx_anre_electricieni_judet ON anre.electricieni(judet, stare);
CREATE INDEX IF NOT EXISTS idx_anre_electricieni_nume_trgm ON anre.electricieni USING gin (nume_prenume gin_trgm_ops);
COMMENT ON TABLE anre.electricieni IS
'ANRE — electricieni autorizati (persoane fizice). Source: portal.anre.ro/PublicLists/AutorizatiiElectricieniAutorizati.';
-- ── 3. Scrape log (mirrors aep.scrape_log convention) ──────────────────────
CREATE TABLE IF NOT EXISTS anre.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'electricitate' / 'gaze' / 'atestat' / 'electricieni'
source_url text NOT NULL,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_anre_scrape_log_started ON anre.scrape_log(started_at DESC);
-- ── 4. Materialized view: per-CUI license rollup ───────────────────────────
-- Joinable with seap.announcements.supplier_cui to detect licensed-vs-unlicensed
-- energy contractors.
CREATE MATERIALIZED VIEW IF NOT EXISTS anre.mv_licente_per_cui AS
SELECT
titular_cui AS cui,
COUNT(*) AS nr_licente_total,
COUNT(*) FILTER (WHERE license_source = 'electricitate') AS nr_electricitate,
COUNT(*) FILTER (WHERE license_source = 'gaze') AS nr_gaze,
COUNT(*) FILTER (WHERE license_source = 'atestat') AS nr_atestate,
COUNT(*) FILTER (WHERE stare ILIKE 'Acord%' OR stare ILIKE 'Activ%') AS nr_active,
COUNT(*) FILTER (WHERE stare ILIKE 'Expir%') AS nr_expirate,
COUNT(*) FILTER (WHERE stare ILIKE 'Retras%' OR stare ILIKE 'Suspend%') AS nr_retrase,
array_agg(DISTINCT license_subtype) FILTER (WHERE license_subtype IS NOT NULL) AS subtipuri,
array_agg(DISTINCT license_source) AS surse,
MIN(data_emitere) AS prima_emitere,
MAX(data_emitere) AS ultima_emitere,
MAX(data_expirare) AS ultima_expirare
FROM anre.licente
WHERE titular_cui IS NOT NULL
GROUP BY titular_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_anre_mv_licente_per_cui ON anre.mv_licente_per_cui(cui);
COMMENT ON MATERIALIZED VIEW anre.mv_licente_per_cui IS
'Rollup of ANRE licenses per CUI. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY anre.mv_licente_per_cui';
+131
View File
@@ -0,0 +1,131 @@
-- 029_ancom.sql
-- ANCOM — Autoritatea Națională pentru Administrare și Reglementare în Comunicații.
-- Public registry of authorized providers of electronic communications networks
-- and services. Source:
-- https://www.ancom.ro/reglementare-ro/comunicatii-electronice/
-- furnizori-comunicatii-electronice/
-- lista-furnizorilor-de-retele-si-servicii-de-comunicatii-autorizati/
--
-- The list is paginated server-side (10 rows/page, ~57 pages → ~570 furnizori).
-- Each row links to a HTML detail page at:
-- https://www.ancom.ro/sablon/furnizorinew_23/?id={id}&pid=4186
--
-- The detail page exposes:
-- • Denumire (titular)
-- • Adresa, Oras/Comuna, Judet/Sector
-- • Cod unic de înregistrare (CUI) — direct, no fuzzy match needed
-- • EUID (Registrul Comerțului) — e.g. ROONRC.J16/3108/1992
-- • R1..R11 — tipuri de retele (Fire metalice, Coaxial, Fibra optica, Mobil,
-- Spectru radio, etc.) cu "Data nasterii dreptului"
-- • S1..S12 — tipuri de servicii (Internet la puncte fixe, Voce mobil,
-- Comunicații interpersonale, etc.)
--
-- Cross-source value:
-- ancom.operatori.titular_cui × seap.announcements.supplier_cui = furnizori
-- telco cu contracte publice. Inverse (anunturi telco CPV 32/64 cu supplier
-- NU în ancom.operatori) = potențial neautorizat.
--
-- Schema layout:
-- 1. ancom.operatori — flat row per provider (CUI direct from page)
-- 2. ancom.drepturi — long table: 1 row per (operator, R/S code)
-- cu data nasterii dreptului. Permite filtrare
-- pe tip retea/serviciu (R3=fibra optica etc.)
-- 3. ancom.scrape_log — mirrors anre.scrape_log convention
-- 4. ancom.mv_operatori_per_cui— rollup pentru join cu seap.announcements
CREATE SCHEMA IF NOT EXISTS ancom;
-- ── 1. Operatori (furnizori autorizati) ────────────────────────────────────
-- One row per ancom.id (the registry numeric id from sablon/furnizorinew_23).
-- ancom_id is PK because it's the natural unique key in the registry.
CREATE TABLE IF NOT EXISTS ancom.operatori (
ancom_id integer PRIMARY KEY, -- ?id={N} in detail URL
titular_name text NOT NULL, -- raw from list table
titular_name_norm text, -- firms.normalize_company_name() — for unmatched-CUI fallback
titular_cui text, -- direct from detail page; idempotent string ('3071154')
cui_match_method text, -- 'direct' (from page) | 'exact_norm' | 'trgm_unique' | 'trgm_judet'
cui_match_score numeric(4,3),
matched_at timestamptz,
euid text, -- 'ROONRC.J16/3108/1992' — Registrul Comerțului
adresa text,
oras text,
judet text, -- 'DOLJ', 'SECTOR 1', etc.
list_judet text, -- judet from list (may differ from detail)
detail_url text NOT NULL, -- canonical URL
status text NOT NULL DEFAULT 'autorizat', -- 'autorizat' | 'radiat' | 'sanctionat' | 'inactiv'
raw_html_hash text, -- sha1 of detail HTML body — change detection
fetched_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_cui ON ancom.operatori(titular_cui) WHERE titular_cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_name_norm ON ancom.operatori USING gin (titular_name_norm gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_judet ON ancom.operatori(judet);
CREATE INDEX IF NOT EXISTS idx_ancom_operatori_status ON ancom.operatori(status);
COMMENT ON TABLE ancom.operatori IS
'ANCOM authorized communications providers. One row per ancom_id from registry. Source: ancom.ro/reglementare-ro/.../lista-furnizorilor-...autorizati/';
COMMENT ON COLUMN ancom.operatori.ancom_id IS
'Natural unique key from detail URL ?id={N}&pid=4186. Stable across scrapes.';
COMMENT ON COLUMN ancom.operatori.titular_cui IS
'CUI direct from detail page "Cod unic de înregistrare". Most rows match — fuzzy fallback used only when missing.';
-- ── 2. Drepturi (R1..R11 + S1..S12 catalog) ────────────────────────────────
-- Long table — one row per (operator, code). Lets us answer:
-- "câți furnizori au drept S2 (mobil) activ?"
-- "în Cluj câți furnizori au R3 (fibră optică)?"
CREATE TABLE IF NOT EXISTS ancom.drepturi (
ancom_id integer NOT NULL REFERENCES ancom.operatori(ancom_id) ON DELETE CASCADE,
cod text NOT NULL, -- 'R1' .. 'R11' | 'S1' .. 'S12'
tip text NOT NULL, -- 'retea' | 'serviciu'
descriere text, -- 'Fire metalice (DSL)' / 'Internet la puncte fixe' / etc.
data_nasterii date, -- "Data nasterii dreptului"
PRIMARY KEY (ancom_id, cod)
);
CREATE INDEX IF NOT EXISTS idx_ancom_drepturi_cod ON ancom.drepturi(cod);
CREATE INDEX IF NOT EXISTS idx_ancom_drepturi_tip ON ancom.drepturi(tip);
COMMENT ON TABLE ancom.drepturi IS
'Drepturile fiecarui furnizor — R1..R11 (retele) + S1..S12 (servicii) cu data nasterii dreptului. Long table, one row per (operator, code).';
-- ── 3. Scrape log ──────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS ancom.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'autorizati' | 'radiati' | 'sanctionati'
source_url text NOT NULL,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_ancom_scrape_log_started ON ancom.scrape_log(started_at DESC);
-- ── 4. Per-CUI rollup (joinable with seap.announcements.supplier_cui) ──────
CREATE MATERIALIZED VIEW IF NOT EXISTS ancom.mv_operatori_per_cui AS
SELECT
o.titular_cui AS cui,
COUNT(*) AS nr_autorizatii,
array_agg(DISTINCT o.ancom_id ORDER BY o.ancom_id) AS ancom_ids,
array_agg(DISTINCT d.cod) FILTER (WHERE d.tip = 'retea') AS retele,
array_agg(DISTINCT d.cod) FILTER (WHERE d.tip = 'serviciu') AS servicii,
bool_or(d.cod = 'S1') AS are_internet_fix,
bool_or(d.cod = 'S2') AS are_mobil,
bool_or(d.cod = 'R3') AS are_fibra,
bool_or(o.status = 'autorizat') AS are_status_activ,
MIN(d.data_nasterii) AS prima_autorizare,
MAX(d.data_nasterii) AS ultima_autorizare,
MAX(o.fetched_at) AS ultima_actualizare
FROM ancom.operatori o
LEFT JOIN ancom.drepturi d ON d.ancom_id = o.ancom_id
WHERE o.titular_cui IS NOT NULL
GROUP BY o.titular_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_ancom_mv_per_cui ON ancom.mv_operatori_per_cui(cui);
COMMENT ON MATERIALIZED VIEW ancom.mv_operatori_per_cui IS
'Rollup ANCOM per CUI (autorizatii + tipuri de retele/servicii). Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY ancom.mv_operatori_per_cui';
@@ -0,0 +1,211 @@
-- 030_ani_schema.sql
-- ANI declarații de avere și interese — flagship transparency feature.
--
-- Source: declaratii.integritate.eu (e-DAI 2022→), old-declaratii.integritate.eu
-- (archive 2008-2022). Public by Law 176/2010, GDPR-safe (no CNP stored).
--
-- ~1.3M PDF declarations of Romanian public officials. Cross-references
-- politicians × firms-they-own × procurement-contracts (firms.entities, seap.*).
--
-- See ANI-PLAN.md for full architecture, volume estimates, and rollout plan.
-- This file = Stage 0 (schema only, no data).
CREATE SCHEMA IF NOT EXISTS ani;
GRANT USAGE ON SCHEMA ani TO PUBLIC;
-- ── ani.officials ──────────────────────────────────────────────────────────
-- Distinct demnitar/funcționar public. Filled by Stage 4 (entity resolution),
-- not by the listing scraper. ani.declaratii.official_id is nullable until
-- dedup runs.
CREATE TABLE IF NOT EXISTS ani.officials (
id bigserial PRIMARY KEY,
normalized_name text NOT NULL, -- lower(unaccent(name)) collapsed
display_name text NOT NULL, -- "Popescu Ioan-Vasile"
cnp_hash char(64), -- SHA-256 if extractable (rare)
first_seen_year smallint, -- min(declaration year)
last_seen_year smallint, -- max(declaration year)
slug text UNIQUE, -- "popescu-ioan-vasile" + suffix
primary_function text, -- most-frequent function
primary_judet text, -- most-frequent judet
declaration_count integer DEFAULT 0, -- materialized count for UI
created_at timestamptz DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_officials_norm_name
ON ani.officials (normalized_name);
CREATE INDEX IF NOT EXISTS idx_officials_norm_name_trgm
ON ani.officials USING gin (normalized_name gin_trgm_ops);
-- ── ani.declaratii ─────────────────────────────────────────────────────────
-- One row per PDF declaration. Listing scraper fills the metadata; PDF
-- downloader fills pdf_path + pdf_sha256; parser fills parse_status.
CREATE TABLE IF NOT EXISTS ani.declaratii (
id bigserial PRIMARY KEY,
official_id bigint REFERENCES ani.officials(id) ON DELETE SET NULL,
-- raw fields straight from portal listing (pre-resolution)
raw_official_name text NOT NULL,
raw_institution text,
raw_function text,
raw_localitate text,
raw_judet text,
-- declaration details
year smallint NOT NULL,
declaration_type text NOT NULL CHECK (declaration_type IN
('avere','interese','avere+interese')),
submission_kind text CHECK (submission_kind IN
('anuala','numire-functie','incetare-functie',
'rectificativa','periodica','altele') OR
submission_kind IS NULL),
data_completare date,
-- source tracking (which portal, which ID)
source_portal text NOT NULL CHECK (source_portal IN
('old','new','depozitar')),
source_url text NOT NULL,
source_id text, -- uniqueIdentifier (old) / _id (new)
-- PDF storage
pdf_path text, -- relative to /opt/vreaudigital-data/ani
pdf_sha256 char(64),
pdf_size_bytes integer,
fetched_at timestamptz,
-- parser state
parsed_at timestamptz,
parse_status text DEFAULT 'pending' CHECK (parse_status IN
('pending','ok','ocr_required','parse_failed',
'template_unknown','download_failed')),
parse_template text, -- '2008-2010' | '2011-2016' | '2017+' | 'edai'
parse_error text,
inserted_at timestamptz DEFAULT now()
);
-- one declaration per (portal, source_id) — primary dedup key
CREATE UNIQUE INDEX IF NOT EXISTS idx_declaratii_source
ON ani.declaratii (source_portal, source_id) WHERE source_id IS NOT NULL;
-- content-hash dedup — same PDF re-uploaded under different IDs
CREATE UNIQUE INDEX IF NOT EXISTS idx_declaratii_sha
ON ani.declaratii (pdf_sha256) WHERE pdf_sha256 IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_declaratii_official
ON ani.declaratii (official_id, year DESC) WHERE official_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_declaratii_year
ON ani.declaratii (year DESC, declaration_type);
CREATE INDEX IF NOT EXISTS idx_declaratii_pending
ON ani.declaratii (parse_status) WHERE parse_status IN ('pending','ocr_required');
CREATE INDEX IF NOT EXISTS idx_declaratii_raw_name_trgm
ON ani.declaratii USING gin (raw_official_name gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_declaratii_raw_inst_trgm
ON ani.declaratii USING gin (raw_institution gin_trgm_ops);
-- ── ani.bunuri ─────────────────────────────────────────────────────────────
-- Sections I (imobile) + II (mobile). raw_row_text always preserved for
-- audit / debug.
CREATE TABLE IF NOT EXISTS ani.bunuri (
id bigserial PRIMARY KEY,
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
category text NOT NULL CHECK (category IN
('imobil-teren','imobil-cladire','mobil-vehicul',
'mobil-bijuterii','mobil-tablouri-arta','mobil-altele')),
subcategory text, -- "agricol"/"intravilan"/"apartament"
localitate text,
judet text,
tara text DEFAULT 'România',
year_acquired smallint,
mode_acquired text, -- "cumparare"/"mostenire"/"donatie"
area_sqm numeric,
share_pct numeric, -- 1.0 = full ownership
co_owner text,
value_lei numeric,
value_currency text DEFAULT 'RON',
raw_row_text text -- audit
);
CREATE INDEX IF NOT EXISTS idx_bunuri_decl ON ani.bunuri (declaration_id);
CREATE INDEX IF NOT EXISTS idx_bunuri_judet ON ani.bunuri (judet) WHERE judet IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_bunuri_category ON ani.bunuri (category);
-- ── ani.shareholdings ──────────────────────────────────────────────────────
-- Section IX (firme deținute / asociate). THE flagship table — joins to
-- firms.entities via firm_cui (resolved in Stage 4) and to seap.announcements
-- via that CUI for "politician-with-firm-supplier-to-state" recipes.
CREATE TABLE IF NOT EXISTS ani.shareholdings (
id bigserial PRIMARY KEY,
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
firm_name_raw text NOT NULL, -- text from PDF
firm_cui text, -- resolved later
firm_match_score real, -- pg_trgm similarity
firm_match_method text CHECK (firm_match_method IN
('exact_name','trgm','manual','unmatched') OR
firm_match_method IS NULL),
matched_at timestamptz,
role text, -- "actionar"/"asociat"/"administrator"/"membru CA"
share_pct numeric,
value_lei numeric,
category text CHECK (category IN
('societate','asociatie','fundatie','cooperativa',
'oNG','altele') OR category IS NULL),
raw_row_text text
);
CREATE INDEX IF NOT EXISTS idx_share_decl ON ani.shareholdings (declaration_id);
CREATE INDEX IF NOT EXISTS idx_share_cui
ON ani.shareholdings (firm_cui) WHERE firm_cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_share_name_trgm
ON ani.shareholdings USING gin (firm_name_raw gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_share_unmatched
ON ani.shareholdings (firm_match_method)
WHERE firm_match_method IS NULL OR firm_match_method = 'unmatched';
-- ── ani.functii ────────────────────────────────────────────────────────────
-- Section VIII — funcții publice și private. Joinable to seap.cui_authority
-- (when is_public + institution_cui matches an authority) and firms.entities
-- (when is_public = false).
CREATE TABLE IF NOT EXISTS ani.functii (
id bigserial PRIMARY KEY,
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
is_public boolean,
function_name text NOT NULL,
institution_name text NOT NULL,
institution_cui text, -- resolved later
start_year smallint,
end_year smallint, -- NULL if active
salary_lei numeric, -- annual income from this function
raw_row_text text
);
CREATE INDEX IF NOT EXISTS idx_functii_decl ON ani.functii (declaration_id);
CREATE INDEX IF NOT EXISTS idx_functii_inst_cui
ON ani.functii (institution_cui) WHERE institution_cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_functii_inst_name_trgm
ON ani.functii USING gin (institution_name gin_trgm_ops);
-- ── ani.donatii ────────────────────────────────────────────────────────────
-- Section V (donații primite).
CREATE TABLE IF NOT EXISTS ani.donatii (
id bigserial PRIMARY KEY,
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
donor_name text,
donation_type text CHECK (donation_type IN
('bani','imobil','mobil','servicii','altele') OR
donation_type IS NULL),
value_lei numeric,
currency text DEFAULT 'RON',
year_received smallint,
raw_row_text text
);
CREATE INDEX IF NOT EXISTS idx_donatii_decl ON ani.donatii (declaration_id);
-- ── Comments ──────────────────────────────────────────────────────────────
COMMENT ON SCHEMA ani IS
'ANI declarații de avere și interese. Sources: declaratii.integritate.eu + old-declaratii.integritate.eu. Public by Law 176/2010.';
COMMENT ON TABLE ani.declaratii IS
'One row per PDF declaration. official_id resolved in Stage 4 dedup.';
COMMENT ON TABLE ani.shareholdings IS
'Section IX firme deținute. THE flagship cross-reference: firm_cui joins to firms.entities → seap.announcements.';
COMMENT ON COLUMN ani.declaratii.pdf_path IS
'Relative path under /opt/vreaudigital-data/ani/. Full path: /opt/vreaudigital-data/ani/$pdf_path';
+197
View File
@@ -0,0 +1,197 @@
-- 031_cnas.sql
-- CNAS — Casa Națională de Asigurări de Sănătate.
-- Lista furnizorilor de servicii medicale aflați în relație contractuală cu CAS-urile județene.
--
-- ───────────────────────────────────────────────────────────────────────────
-- Source landscape (investigated 2026-05-10):
-- ───────────────────────────────────────────────────────────────────────────
-- The CNAS source ecosystem is in active migration. There are 3 layers:
--
-- 1. cnas.ro/wp-content/uploads/ — central WordPress media library, 4.18K
-- files total. ~70-90 are "furnizori" PDFs (national + per-județ snapshots).
-- Discoverable via /wp-json/wp/v2/media REST API. 99% PDF, ~5 XLSX.
-- → THIS IS THE PRIMARY SOURCE for v1.
--
-- 2. cas.cnas.ro/casXX — new Angular SPA per județ (42 sub-instances). The
-- backend admin/api is a Blazor app at /admin/api/* with X-Instance-Key
-- header routing. As of 2026-05, most endpoints return [] or 500. The data
-- hasn't been migrated to the new infra yet.
-- → DEFERRED — track via watch script, ingest when populated.
--
-- 3. www.cnas.ro/casXX/page/lista-furnizori-*.html — old pre-migration WP.
-- 301-redirects to cnas.ro/casXX/ which is now a stub. Effectively dead
-- for our purposes; some files survive in CKEditor uploads on the old
-- cas.cnas.ro/casXX/theme/cnas/js/ckeditor/filemanager/userfiles/...
-- → DEFERRED — best harvested via Wayback CDX.
--
-- ───────────────────────────────────────────────────────────────────────────
-- File naming convention (cnas.ro/wp-content/uploads/):
-- ───────────────────────────────────────────────────────────────────────────
-- Most files DON'T contain CUI codes. They contain provider names + sediu +
-- contact data. CUI matching is post-ingest via firms.normalize_company_name
-- + trgm fuzzy search (mirroring anre.licente pattern).
--
-- Filename signals tip_serviciu:
-- FURNIZORI-SPITALE-IN-CONTRACT-2024.pdf → tip='spital'
-- FURNIZORI-IN-CONTRACT-MEDICINA-DE-FAMILIE.. → tip='medicina_familie'
-- FURNIZORI-DE-SERVICII-FARMACEUTICE-.. → tip='farmacie'
-- FURNIZORI-DISPOZITIVE-MEDICALE-.. → tip='dispozitive_medicale'
-- FURNIZORI-MEDICINA-DENTARA-.. → tip='medicina_dentara'
-- FURNIZORI-INGRIJIRI-MEDICALE-.. → tip='ingrijiri_medicale'
-- FURNIZORI-INGRIJIRI-PALIATIVE-.. → tip='ingrijiri_paliative'
-- FURNIZORI-RECUPERARE-MEDICALA-.. → tip='recuperare_medicala'
-- FURNIZORI-PNS-.. → tip='pns' (programe nationale)
-- FURNIZORI-IN-CONTRACT-AMBULATORIU-CLINIC-.. → tip='ambulatoriu_clinic'
-- FURNIZORI-IN-CONTRACT-AMBULATORIU-PARACLIN.. → tip='paraclinic'
-- FURNIZORI-..-URGENTA-PRESPITALICEASCA-.. → tip='urgenta_transport'
-- Lista-furnizori-clinice-.. → tip='clinic'
-- Lista-furnizori-RECA-.. → tip='recuperare_a'
-- Lista-furnizori-radioterapie-.. → tip='radioterapie'
-- Lista-furnizori-testare-genetica-.. → tip='testare_genetica'
-- Lista-furnizori-tumori-solide-maligne-.. → tip='oncologie'
--
-- Județ extraction is heuristic: from filename (e.g. CAS-GORJ, CAS-ARAD) OR
-- from PDF page header ("CASA DE ASIGURARI A JUDETULUI GORJ"). When both fail,
-- it's a national list (rare — most centrally-uploaded files are actually
-- per-județ, as the PDFs are produced by individual CAS-uri).
--
-- ───────────────────────────────────────────────────────────────────────────
-- Cross-source value:
-- ───────────────────────────────────────────────────────────────────────────
-- 1. cnas.furnizori.cui (resolved post-ingest) × seap.announcements.supplier_cui
-- @ CPV 33.* (medical equipment) / 85.* (medical services) =
-- "medical providers winning state contracts directly + via insurance"
-- 2. cnas.furnizori.cui × anaf_datornici.datornic.cui = "spitale & clinici cu
-- datorii la stat" — red-flag pattern.
-- 3. cnas.furnizori.cui × fonduri.proiect.beneficiar_cui (POIM-Sănătate) =
-- EU-funded healthcare providers.
CREATE SCHEMA IF NOT EXISTS cnas;
-- ── 1. Documents (file metadata catalog) ───────────────────────────────────
-- Tracks every PDF/XLSX harvested from cnas.ro WP media. One row per file URL.
-- Idempotent re-fetch: same URL → UPDATE fetched_at + parsed_at.
CREATE TABLE IF NOT EXISTS cnas.documents (
id bigserial PRIMARY KEY,
source_url text NOT NULL UNIQUE,
source text NOT NULL, -- 'wp-media' | 'cas-cnas-spa' | 'wayback' | 'manual'
wp_media_id bigint, -- /wp-json/wp/v2/media id (when source='wp-media')
title text,
filename text,
mime_type text,
file_size_bytes integer,
file_sha1 char(40), -- of downloaded bytes (used to detect changes)
published_at timestamptz, -- WP "date" (publication on CNAS site)
-- Inferred classification (heuristic from filename + content):
tip_serviciu text, -- 'spital' / 'farmacie' / 'medicina_familie' / etc. (see header above)
judet text, -- "GORJ" / "BIHOR" / NULL when national or unknown
perioada text, -- raw period from filename ("01.07.2024", "decembrie-2024")
page_count integer, -- for PDFs, post-parse
parse_status text NOT NULL DEFAULT 'pending',-- 'pending' / 'ok' / 'failed' / 'unsupported_format' / 'no_table'
parse_error text,
rows_extracted integer NOT NULL DEFAULT 0,
fetched_at timestamptz NOT NULL DEFAULT now(),
parsed_at timestamptz
);
CREATE INDEX IF NOT EXISTS idx_cnas_documents_tip ON cnas.documents(tip_serviciu);
CREATE INDEX IF NOT EXISTS idx_cnas_documents_judet ON cnas.documents(judet);
CREATE INDEX IF NOT EXISTS idx_cnas_documents_published ON cnas.documents(published_at DESC);
CREATE INDEX IF NOT EXISTS idx_cnas_documents_parse_status ON cnas.documents(parse_status);
COMMENT ON TABLE cnas.documents IS
'Catalog of every CNAS provider-list document (PDF/XLSX) harvested. One row per source URL. Source: cnas.ro/wp-json/wp/v2/media + future SPA endpoints.';
COMMENT ON COLUMN cnas.documents.tip_serviciu IS
'Service category inferred from filename: spital, farmacie, medicina_familie, medicina_dentara, dispozitive_medicale, ambulatoriu_clinic, paraclinic, ingrijiri_medicale, ingrijiri_paliative, recuperare_medicala, urgenta_transport, pns, clinic, oncologie, testare_genetica, radioterapie, other.';
-- ── 2. Furnizori (extracted provider records) ──────────────────────────────
-- One row per (document, NR_CRT). Provider name is the fundamental key — CUI
-- is resolved POST-INSERT via firms.normalize_company_name fuzzy match (mirror
-- of anre.licente pattern).
--
-- We allow the same legal entity (same CUI) to appear MULTIPLE times across
-- documents (e.g. same hospital listed in spitale + paraclinic + clinic lists).
-- Dedup is via mv_cnas_per_cui rollup, not at insert time.
CREATE TABLE IF NOT EXISTS cnas.furnizori (
id bigserial PRIMARY KEY,
document_id bigint NOT NULL REFERENCES cnas.documents(id) ON DELETE CASCADE,
-- Document context (denormalized for fast filtering):
tip_serviciu text, -- inherited from document
judet text, -- inherited from document (or row-level when available)
perioada text, -- "la 01.07.2024" etc.
-- Source row data:
nr_crt integer, -- in-document index (1..N)
cod_furnizor text, -- "BH01" / "CT12" — CAS-internal supplier code (when present)
name text NOT NULL, -- raw from document ("Spitalul Clinic Județean Oradea", "DR.HEIM HERMINA")
name_norm text, -- firms.normalize_company_name — populated post-insert
reprezentant text, -- legal rep / cabinet doctor (for medicina familie, dentara)
sediu text, -- adresa
localitate text,
telefon text,
fax text,
email text,
specialitate text, -- pentru ambulatoriu, paraclinic, clinic
-- CUI matching (resolved post-ingest):
cui text,
cui_match_score numeric(4,3),
cui_match_method text, -- 'exact_norm' / 'trgm_unique' / 'trgm_judet' / 'manual'
matched_at timestamptz,
raw_text text, -- the raw text-row from PDF for debugging
fetched_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (document_id, nr_crt, name)
);
CREATE INDEX IF NOT EXISTS idx_cnas_furn_cui ON cnas.furnizori(cui) WHERE cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_cnas_furn_judet_tip ON cnas.furnizori(judet, tip_serviciu);
CREATE INDEX IF NOT EXISTS idx_cnas_furn_name_trgm ON cnas.furnizori USING gin (name_norm gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_cnas_furn_document ON cnas.furnizori(document_id);
CREATE INDEX IF NOT EXISTS idx_cnas_furn_localitate ON cnas.furnizori(localitate) WHERE localitate IS NOT NULL;
COMMENT ON TABLE cnas.furnizori IS
'Furnizori de servicii medicale extrași din PDF-urile CNAS. One row per (document, nr_crt, name). CUI resolved post-insert via firms.normalize_company_name + trgm fuzzy.';
COMMENT ON COLUMN cnas.furnizori.cod_furnizor IS
'Internal CAS supplier code, e.g. "BH01" (Bihor seq 01), "MB427" (București-Ilfov seq 427). NOT a CUI.';
COMMENT ON COLUMN cnas.furnizori.cui IS
'Resolved CUI via firms.normalize_company_name + pg_trgm match. NULL = unmatched; cabinete medicale individuale (CMI) often have no CUI in firms registry.';
-- ── 3. Scrape log ─────────────────────────────────────────────────────────
-- Mirrors anre.scrape_log convention.
CREATE TABLE IF NOT EXISTS cnas.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'wp-media-list' / 'parse-pdf' / 'match-cui' / 'cas-cnas-spa'
source_url text,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_cnas_scrape_log_started ON cnas.scrape_log(started_at DESC);
-- ── 4. Materialized view: per-CUI provider rollup ─────────────────────────
-- Joinable with seap.announcements + anaf_datornici + fonduri.proiect for
-- cross-source detection. Refreshed via cron after match-cui pass.
CREATE MATERIALIZED VIEW IF NOT EXISTS cnas.mv_furnizori_per_cui AS
SELECT
cui,
COUNT(*) AS nr_aparitii, -- across all lists
COUNT(DISTINCT tip_serviciu) FILTER (WHERE tip_serviciu IS NOT NULL) AS nr_tipuri_serviciu,
COUNT(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS nr_judete,
array_agg(DISTINCT tip_serviciu) FILTER (WHERE tip_serviciu IS NOT NULL) AS tipuri_serviciu,
array_agg(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS judete,
-- One representative name (most common):
(array_agg(name ORDER BY length(name)))[1] AS name_sample,
MIN(fetched_at) AS first_seen,
MAX(fetched_at) AS last_seen
FROM cnas.furnizori
WHERE cui IS NOT NULL
GROUP BY cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnas_mv_per_cui ON cnas.mv_furnizori_per_cui(cui);
COMMENT ON MATERIALIZED VIEW cnas.mv_furnizori_per_cui IS
'Per-CUI rollup of CNAS provider appearances. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnas.mv_furnizori_per_cui;';
+118
View File
@@ -0,0 +1,118 @@
-- 032_aaas.sql
-- AAAS — Autoritatea pentru Administrarea Activelor Statului.
-- Manages the state's residual ownership in privatized firms + collects
-- post-privatization debts. Tagging firms with "state still owns" /
-- "owes state money" / "post-priv investment obligation" is rare and powerful.
--
-- Sources investigated 2026-05-10:
-- 1. https://www.aaas.gov.ro/despre-aaas/1-9-guvernanta-corporativa-aaas/
-- 1-9-3-companii-sub-autoritatea-aaas/
-- → 12 named active-portfolio companies; each has a clean subpage with
-- CUI, J-number, address, phone, web, email, AAAS share %.
-- 2. https://www.aaas.gov.ro/4-oferta-a-a-a-s/4-2-vanzari-actiuni/
-- → "SECȚIUNE ÎN CONSTRUCȚIE" — only EXPO PARC SRL Iași as PDF teaser.
-- 3. https://www.aaas.gov.ro/4-oferta-a-a-a-s/4-3-valorificare-creante/
-- → "SECȚIUNE ÎN CONSTRUCȚIE" — debt list not published structured online.
-- 4. https://gwp.aaas.gov.ro/Directia-creante
-- → Login-gated services portal; no anonymous CUI/debtor lookup.
--
-- This schema is intentionally narrow: 12-15 confirmed CUIs is small but
-- HIGH SIGNAL — every firms profile that joins back here gets "STAT DEȚINE
-- ACȚIUNI" tag. Future passes can ingest historical lists (e.g. ORDIN
-- 278/2005 PDF — 800+ commercial companies × 41 counties as legacy snapshot).
--
-- Cross-source value:
-- aaas.firme.cui × seap.announcements.supplier_cui
-- = "Companies in active state portfolio winning more state contracts"
-- aaas.firme.cui × anaf.datornici
-- = "State-owned company that itself owes the state money"
-- aaas.firme.cui × firms.financials
-- = "How is the residual state-owned portfolio actually performing?"
CREATE SCHEMA IF NOT EXISTS aaas;
-- ── 1. Firme sub autoritatea AAAS / monitorizate de AAAS ───────────────────
-- One row per company, keyed by CUI. Status enum captures the AAAS
-- relationship type. Re-runs UPSERT on cui (last_action / state_share_pct
-- can change). Original AAAS subpage URL preserved for traceability.
CREATE TABLE IF NOT EXISTS aaas.firme (
cui text PRIMARY KEY,
name text NOT NULL, -- raw name as published by AAAS
name_normalized text, -- firms.normalize_company_name(name)
reg_number text, -- "J40/8215/2020"
aaas_status text NOT NULL, -- 'active_holding' | 'post_priv_debt' | 'insolventa' | 'recuperare' | 'vanzare_actiuni' | 'vanzare_creante'
state_share_pct numeric(6,3), -- "Participatie AAAS: 100%"
debt_to_state_lei numeric(20,2), -- only when AAAS publishes structured amount
last_action text, -- short description of last AAAS action (free-form)
last_action_date date,
address text, -- "Adresa: ..." line
phone text,
email text,
website text,
source_url text NOT NULL, -- specific AAAS subpage
notes text,
raw jsonb, -- full parsed key/value bag
cui_match_score numeric(4,3), -- only if matched via fuzzy (NULL if AAAS itself published the CUI)
cui_match_method text, -- 'aaas_published' | 'exact_norm' | 'trgm_unique'
fetched_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_aaas_firme_status ON aaas.firme(aaas_status);
CREATE INDEX IF NOT EXISTS idx_aaas_firme_debt ON aaas.firme(debt_to_state_lei DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_aaas_firme_share_pct ON aaas.firme(state_share_pct DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_aaas_firme_name_trgm ON aaas.firme USING gin (name_normalized gin_trgm_ops);
COMMENT ON TABLE aaas.firme IS
'Firme aflate sub autoritatea AAAS / monitorizate de AAAS. PK = CUI. '
'Sursă primară: aaas.gov.ro subpages 1.9.3 + 4.2 + 4.3 (HTML scrape).';
COMMENT ON COLUMN aaas.firme.aaas_status IS
'active_holding = AAAS deține pachet de acțiuni; '
'post_priv_debt = obligații post-privatizare neîndeplinite; '
'insolventa = în procedură de insolvență administrată de AAAS; '
'recuperare = creanță în recuperare; '
'vanzare_actiuni = ofertă de vânzare acțiuni publicată; '
'vanzare_creante = ofertă de vânzare creanță publicată.';
COMMENT ON COLUMN aaas.firme.state_share_pct IS
'Procent de participație AAAS în acționariat. 100 = stat unic acționar.';
COMMENT ON COLUMN aaas.firme.cui_match_method IS
'aaas_published = CUI publicat direct de AAAS (autoritativ); '
'exact_norm = match exact pe firms.normalize_company_name; '
'trgm_unique = match trigram unic peste 0.85.';
-- ── 2. Scrape log (mirrors anre.scrape_log convention) ────────────────────
CREATE TABLE IF NOT EXISTS aaas.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'portfolio' | 'vanzari_actiuni' | 'vanzari_creante'
source_url text NOT NULL,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_aaas_scrape_log_started ON aaas.scrape_log(started_at DESC);
-- ── 3. Materialized view: per-CUI rollup for joinability ──────────────────
-- Trivial today (1 row per cui), but the MV pattern is consistent with
-- anre.mv_licente_per_cui / regas / etc. and keeps the join API uniform
-- when more AAAS sources land. Refresh: REFRESH MATERIALIZED VIEW
-- CONCURRENTLY aaas.mv_per_cui;
CREATE MATERIALIZED VIEW IF NOT EXISTS aaas.mv_per_cui AS
SELECT
cui,
array_agg(DISTINCT aaas_status) AS statusuri,
MAX(state_share_pct) AS max_state_share_pct,
SUM(debt_to_state_lei) AS total_debt_to_state_lei,
MAX(fetched_at) AS last_seen_at,
COUNT(*) AS rows_count
FROM aaas.firme
GROUP BY cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_aaas_mv_per_cui ON aaas.mv_per_cui(cui);
COMMENT ON MATERIALIZED VIEW aaas.mv_per_cui IS
'Rollup AAAS per CUI. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY aaas.mv_per_cui.';
+202
View File
@@ -0,0 +1,202 @@
-- 033_cnsc.sql
-- CNSC — Consiliul Național de Soluționare a Contestațiilor.
-- Independent administrative-jurisdictional body that resolves
-- contestations against SEAP procurement procedures (Law 101/2016).
--
-- Source investigated 2026-05-10:
-- http://portal.cnsc.ro/decizii.html
-- → ASP.NET WebForms front, but data is loaded async via
-- POST http://portal.cnsc.ro/Default.aspx/CallWebMethod
-- payload: {"sender":"67fb8141-f456-4276-b51a-baca731f92ca",
-- "methodName":"get",
-- "senderParams":"a=search&reg:registrationDate=-&page=N",
-- "isBuletin":"0"}
-- Header: Referer must match the page query — server reads page from it.
-- Cookies: ASP.NET_SessionId required; obtained by GET /decizii.html first.
-- → Returns JSON {"d":"<html>...</html>"} with a <table> inside.
-- → 50 rows per page × 617 pages ≈ 30,850 decisions.
-- → Page 1 has 2026 decisions; last page (617) lands in 2024.
-- Sort order is by decision number DESC (panel-grouped).
--
-- Column layout in the listing (already structured — no PDF parse needed
-- to get 80% of the value):
-- 1. Numar decizie → decision_no
-- 2. Nume Contestator → contestator_name (sometimes multiple)
-- 3. Nr. Inregistrare CNSC → registration_no_cnsc
-- 4. Denumire Autoritate → authority_name
-- 5. CUI Contestator → contestator_cui ← linkable to firms.entities
-- 6. CUI Autoritate Contractantă → authority_cui ← linkable to seap.announcements.authority_cui
-- 7. An → year
-- 8. Dată Inregistrare → registration_date
-- 9. Download → pdf_docuid (b64 docUID for sivadoc/download.aspx)
--
-- IMPORTANT: the listing does NOT include the SEAP procedure_ref (CN######) —
-- that lives only inside the PDF text. Stage 2 (PDF text extraction with
-- pdftotext + regex for "CN[0-9]{6,}|SCN[0-9]+|ADV[0-9]+" can recover the
-- SEAP ref for ~80% of decisions; estimate 15-25h to design+QA the parser
-- across the full 30K corpus.
--
-- Decision_type is also PDF-only: the listing shows when the contestation
-- was REGISTERED, not the outcome. PDF parsing is required to recover
-- 'admis' / 'respins' / 'admis în parte' / 'redirecționat' / 'arhivat'.
--
-- Cross-source value (live, even at Stage 1):
-- cnsc.decizii.authority_cui × seap.announcements.authority_cui
-- = "Authorities with most contestations filed against them" (procedural risk score)
-- cnsc.decizii.contestator_cui × seap.announcements.supplier_cui
-- = "Suppliers most active in contesting losses" (litigious-bidder profile)
-- cnsc.decizii.contestator_cui × firms.entities + financials
-- = "Who challenges the most? Are they real bidders or vexatious filers?"
--
-- After Stage 2 PDF parse:
-- cnsc.decizii × seap.announcements ON seap_procedure_ref = ref_number
-- = full contestation lifecycle: tender → contestation → CNSC outcome → award
-- GROUP BY authority_cui WHERE decision_type='admis' / total
-- = "Authorities most likely to lose at CNSC" — strong signal of vicious
-- procedure design. THIS IS THE KILLER QUERY.
CREATE SCHEMA IF NOT EXISTS cnsc;
-- ── 1. Decizii — one row per CNSC decision ─────────────────────────────────
-- PK = (decision_no, year). The CNSC numbering resets every year, so
-- (1234, 2024) ≠ (1234, 2025). docUID is unique-by-doc but not all rows
-- have one in old data, so we don't make it the PK.
CREATE TABLE IF NOT EXISTS cnsc.decizii (
id bigserial PRIMARY KEY,
decision_no integer NOT NULL, -- 1445
decision_year smallint NOT NULL, -- 2026
registration_no_cnsc text, -- "10549, 20389, 20395" — comma-separated when multiple
registration_date date, -- 17.02.2026 → 2026-02-17
-- Contestator (the bidder who filed the complaint)
contestator_name_raw text, -- "RAC CONSTRUCT MORENI SRL , RAC CONSTRUCT MORENI SRL, URBIO DOWNSTREAM SRL"
contestator_names text[], -- split + trimmed
contestator_cui_raw text, -- "RO18035010" or "RO18035010;4663448" or empty
contestator_cuis text[], -- normalized digits only, duplicates collapsed
-- Autoritatea contractantă (the public buyer being contested)
authority_name text,
authority_cui_raw text, -- "4495140;" or "16054368" or empty
authority_cuis text[], -- normalized digits only, duplicates collapsed
-- PDF reference (download URL is built from docuid_b64 + filename_b64)
pdf_filename text, -- "Decizie_1445.pdf"
pdf_docuid_b64 text, -- "Mzg4NThkZGQtY2JkMS00ZDg3LTlhY2UtY2ZlMTBlYzAwM2Y0"
pdf_url text, -- materialized: full http://portal.cnsc.ro/sivadoc/download.aspx?...
-- Stage-2 (PDF parse) fields — NULL until pdftotext+regex pass runs
-- Mostly populated post-hoc; kept here so the schema stays single-table.
seap_procedure_ref text, -- "CN1234567" / "ADV……" / "SCN……" — joinable to seap.announcements.ref_number
decision_type text, -- 'admis' | 'admis_in_parte' | 'respins' | 'redirectionat' | 'arhivat' | 'fond' | NULL
decision_date date, -- date the decision was issued (different from registration_date)
decision_summary text, -- short extracted summary
pdf_text_sha1 text, -- sha1 of pdftotext output → idempotent re-parse
pdf_parsed_at timestamptz,
-- Provenance
source_page integer, -- which listing page we found this on (debug)
fetched_at timestamptz NOT NULL DEFAULT now(),
CONSTRAINT cnsc_decizii_pk_natural UNIQUE (decision_no, decision_year)
);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_authority_cuis
ON cnsc.decizii USING gin (authority_cuis);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_contestator_cuis
ON cnsc.decizii USING gin (contestator_cuis);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_seap_ref
ON cnsc.decizii (seap_procedure_ref) WHERE seap_procedure_ref IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_decision_type
ON cnsc.decizii (decision_type) WHERE decision_type IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_year
ON cnsc.decizii (decision_year DESC);
CREATE INDEX IF NOT EXISTS idx_cnsc_decizii_reg_date
ON cnsc.decizii (registration_date DESC NULLS LAST);
COMMENT ON TABLE cnsc.decizii IS
'Decizii CNSC — contestații pe proceduri SEAP. PK natural (decision_no, decision_year). '
'Stage 1: scrape listing din portal.cnsc.ro/decizii.html (50/page × 617 pages ≈ 30K rows). '
'Stage 2 (TODO): pdftotext pe PDF-ul referit → seap_procedure_ref + decision_type.';
COMMENT ON COLUMN cnsc.decizii.decision_no IS
'Numărul deciziei CNSC. Resetat anual — întotdeauna unic doar împreună cu decision_year.';
COMMENT ON COLUMN cnsc.decizii.registration_no_cnsc IS
'Numărul/numerele de înregistrare a contestației la CNSC. Poate fi listă comma-separată '
'când o decizie soluționează mai multe contestații (ex. "10549, 20389, 20395").';
COMMENT ON COLUMN cnsc.decizii.contestator_cuis IS
'CUIs cifre-only ale contestatorilor (mai mulți când o asociere atacă). '
'Joinabil cu firms.entities.cui sau seap.announcements.supplier_cui.';
COMMENT ON COLUMN cnsc.decizii.authority_cuis IS
'CUIs cifre-only ale autorităților contractante. Joinabil cu seap.announcements.authority_cui.';
COMMENT ON COLUMN cnsc.decizii.pdf_url IS
'URL complet sivadoc/download.aspx?docUID=…&filename=…&action=inline (b64 in query).';
COMMENT ON COLUMN cnsc.decizii.seap_procedure_ref IS
'Referința procedurii SEAP extrasă din PDF (CN/SCN/ADV/RFQ + cifre). NULL până la Stage-2 PDF-parse.';
COMMENT ON COLUMN cnsc.decizii.decision_type IS
'Tipul deciziei extras din textul PDF: admis | admis_in_parte | respins | redirectionat | arhivat | fond. NULL până la Stage-2.';
-- ── 2. Scrape log (mirrors anre/aaas convention) ──────────────────────────
CREATE TABLE IF NOT EXISTS cnsc.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'listing' | 'pdf_parse'
page_from integer,
page_to integer,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_cnsc_scrape_log_started ON cnsc.scrape_log(started_at DESC);
-- ── 3. Materialized view: per-CUI rollup (authority side) ─────────────────
-- Used by the killer query "authorities most likely to lose at CNSC".
-- decision_type rollup is meaningful only after Stage-2 PDF parse is done;
-- until then admis_count / respins_count are 0 and contestation_count is
-- the useful field.
-- Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui;
CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_authority_cui AS
SELECT
authority_cui AS cui,
COUNT(*) AS contestation_count,
COUNT(*) FILTER (WHERE decision_type = 'admis') AS admis_count,
COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS admis_in_parte_count,
COUNT(*) FILTER (WHERE decision_type = 'respins') AS respins_count,
COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count,
MIN(registration_date) AS first_contestation_date,
MAX(registration_date) AS last_contestation_date
FROM cnsc.decizii d, unnest(authority_cuis) AS authority_cui
WHERE authority_cui IS NOT NULL AND authority_cui <> ''
GROUP BY authority_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_authority_cui
ON cnsc.mv_per_authority_cui(cui);
COMMENT ON MATERIALIZED VIEW cnsc.mv_per_authority_cui IS
'Rollup CNSC per autoritate contractantă (CUI). Refresh: '
'REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui.';
-- ── 4. Materialized view: per-CUI rollup (contestator side) ───────────────
CREATE MATERIALIZED VIEW IF NOT EXISTS cnsc.mv_per_contestator_cui AS
SELECT
contestator_cui AS cui,
COUNT(*) AS contestations_filed,
COUNT(*) FILTER (WHERE decision_type = 'admis') AS won_admis,
COUNT(*) FILTER (WHERE decision_type = 'admis_in_parte') AS won_partial,
COUNT(*) FILTER (WHERE decision_type = 'respins') AS lost_respins,
COUNT(*) FILTER (WHERE decision_type IS NOT NULL) AS resolved_count,
MIN(registration_date) AS first_contestation_date,
MAX(registration_date) AS last_contestation_date
FROM cnsc.decizii d, unnest(contestator_cuis) AS contestator_cui
WHERE contestator_cui IS NOT NULL AND contestator_cui <> ''
GROUP BY contestator_cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_cnsc_mv_per_contestator_cui
ON cnsc.mv_per_contestator_cui(cui);
COMMENT ON MATERIALIZED VIEW cnsc.mv_per_contestator_cui IS
'Rollup CNSC per contestator (CUI). Cine atacă cel mai mult, cu ce rată de succes. '
'Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_contestator_cui.';
+123
View File
@@ -0,0 +1,123 @@
-- 034_asf.sql
-- ASF — Autoritatea de Supraveghere Financiară.
-- Public registries of authorized financial entities (insurers, brokers, pension
-- funds, asset managers, intermediaries) scraped from data.asfromania.ro.
--
-- Sources (all return JSON{raspuns:HTML, status:100} via POST cautare):
-- 1. /scr/ra/cautare?l=ro (Registrul asigurătorilor + intermediarilor)
-- sectiune=1 tipCompanie=0 → Societăți de asigurare - companii active
-- sectiune=2 tipCompanie=0 → Societăți de asigurare - companii radiate
-- sectiune=1 tipCompanie=1 → Intermediari principali - companii active
-- sectiune=2 tipCompanie=1 → Intermediari principali - companii radiate
-- Fields per panel: register_no (RA-XXX/RBK-XXX), LEI, CUI, RC code,
-- authorization no/date, registration date, radiation date, type, legal form,
-- address, phone, fax, observations, authorized classes (general/life),
-- executives. Total: ~768 insurers + ~801 brokers ≈ 1.5K entities.
--
-- 2. /scr/ra/cautare endpoint accepts free-text 'termen' (≥4 chars). Search
-- hits denumire, CUI, adresă, județ, classes. NO captcha required when
-- 'g-recaptcha-response' field is OMITTED from the POST body. (When sent
-- with any non-empty value the server tries to verify and returns
-- "Verificare captcha eșuată".)
--
-- 3. Pension funds + AIFM/UCITS register pages exist on asfromania.ro/ro/a/...
-- but most are F5-WAF-protected from non-browser clients. We start with the
-- ra portal which has cleanest data; document handoff for additional
-- registers in ASF-PLAN.md.
--
-- Cross-source value: asf.entitati.cui (extracted directly from response, no
-- fuzzy match needed) × seap.announcements.supplier_cui = "ASF-licensed firms
-- with state contracts". Red-flag: insurance firm wins SEAP contract for state
-- insurance services but has been radiated by ASF; broker active in SEAP but
-- with suspended/withdrawn ASF authorization.
CREATE SCHEMA IF NOT EXISTS asf;
-- ── 1. Authorized entities (insurers, brokers, pension funds, AIFM, UCITS) ──
-- One row per distinct ASF register entry. Every entity has a register_no
-- (RA-NNN for insurers, RBK-NNN for brokers, etc.) which is globally unique
-- per register_type.
CREATE TABLE IF NOT EXISTS asf.entitati (
id bigserial PRIMARY KEY,
register_type text NOT NULL, -- 'asigurator' | 'broker' | 'fond_pensii' | 'aifm' | 'ucits' | 'intermediar_secundar'
section_status text NOT NULL, -- 'activ' | 'radiat' (mirrors source sectiune=1/2 split)
register_no text NOT NULL, -- e.g. "RA-057", "RBK-123" (unique within register_type)
name text NOT NULL, -- raw "Denumire"
name_normalized text, -- firms.normalize_company_name(name) — for trigram fallback
cui text, -- "Cod unic de identificare (CUI)"
cod_rc text, -- "Cod unic RC" (e.g. J40/2226/2006)
cod_lei text, -- LEI 20-char
nr_autorizatie text, -- "Număr autorizație" (e.g. 114.146)
data_autorizare date, -- "Dată autorizare"
data_inmatriculare date, -- "Dată înmatriculare"
data_radiere date, -- "Dată radiere" (NULL when active)
tip_companie text, -- "Tip companie" (Societate de asigurare / Intermediar principal / etc.)
forma_juridica text, -- "Formă juridică"
adresa text, -- "Adresă"
telefon text,
fax text,
email text,
web text,
observatii text, -- free-text remarks
clase_autorizate jsonb, -- {"asigurari_generale":[...], "asigurari_viata":[...]}
conducere jsonb, -- [{"nume":"X","functie":"Y","din":"DD.MM.YYYY"}]
raw_html text, -- raw panel HTML for traceability
fetched_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (register_type, register_no)
);
CREATE INDEX IF NOT EXISTS idx_asf_entitati_cui ON asf.entitati(cui) WHERE cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_asf_entitati_name_norm_trgm ON asf.entitati USING gin (name_normalized gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_asf_entitati_type_status ON asf.entitati(register_type, section_status);
CREATE INDEX IF NOT EXISTS idx_asf_entitati_radiere ON asf.entitati(data_radiere) WHERE data_radiere IS NOT NULL;
COMMENT ON TABLE asf.entitati IS
'ASF authorized entities — insurers, brokers, pension funds, AIFM/UCITS, intermediaries. Source: data.asfromania.ro/scr/ra (and other registers).';
COMMENT ON COLUMN asf.entitati.register_type IS
'asigurator (RA-NNN) / broker (RBK-NNN) / fond_pensii / aifm / ucits / intermediar_secundar';
COMMENT ON COLUMN asf.entitati.section_status IS
'activ / radiat — mirrors source sectiune=1/sectiune=2 split. Active record has data_radiere=NULL.';
-- ── 2. Scrape log (mirrors anre.scrape_log convention) ──────────────────────
CREATE TABLE IF NOT EXISTS asf.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'asigurator_activ' / 'asigurator_radiat' / 'broker_activ' / ...
source_url text NOT NULL,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_asf_scrape_log_started ON asf.scrape_log(started_at DESC);
-- ── 3. Materialized view: per-CUI ASF rollup ────────────────────────────────
-- Joinable with seap.announcements.supplier_cui to detect financial firms
-- holding state contracts.
CREATE MATERIALIZED VIEW IF NOT EXISTS asf.mv_entitati_per_cui AS
SELECT
cui,
COUNT(*) AS nr_total,
COUNT(*) FILTER (WHERE register_type = 'asigurator') AS nr_asigurator,
COUNT(*) FILTER (WHERE register_type = 'broker') AS nr_broker,
COUNT(*) FILTER (WHERE register_type = 'fond_pensii') AS nr_fond_pensii,
COUNT(*) FILTER (WHERE register_type = 'aifm') AS nr_aifm,
COUNT(*) FILTER (WHERE register_type = 'ucits') AS nr_ucits,
COUNT(*) FILTER (WHERE section_status = 'activ') AS nr_active,
COUNT(*) FILTER (WHERE section_status = 'radiat') AS nr_radiate,
array_agg(DISTINCT register_type) AS register_types,
array_agg(DISTINCT register_no ORDER BY register_no) AS register_numbers,
MIN(data_autorizare) AS prima_autorizare,
MAX(data_radiere) AS ultima_radiere
FROM asf.entitati
WHERE cui IS NOT NULL
GROUP BY cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_asf_mv_entitati_per_cui ON asf.mv_entitati_per_cui(cui);
COMMENT ON MATERIALIZED VIEW asf.mv_entitati_per_cui IS
'Rollup of ASF entities per CUI. Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY asf.mv_entitati_per_cui';
@@ -0,0 +1,120 @@
-- 035_curteacont.sql
-- Curtea de Conturi a României — Rapoarte de audit financiar / conformitate /
-- performanță / control / follow-up.
--
-- Source: https://www.curteadeconturi.ro/rapoarte-audit/{category}
-- Categories scraped:
-- - rapoarte-audit-financiar (~1,890 reports, 127 listing pages × 15)
-- - rapoarte-conformitate (~2,580 reports, 173 pages × 15)
-- - rapoarte-audit-performanta (~135 reports, 9 pages × 15)
-- Approximate total: ~4,600 reports, growing weekly with new audits.
--
-- Detail page exposes a single PDF download link of the form
-- `/rapoarte-audit/downloads/{integer_id}` (verified IDs: 4078, 7335, 7854,
-- 10653, 12418, 13832, 14183 — sequential, predictable).
--
-- Stage 1 (this file + scrape-curteacont.ts):
-- - Walks listing pages, harvests slug URLs + titles + publication dates +
-- audit_type + audited entity name (parsed from title).
-- - DOES NOT fetch detail pages or download PDFs (that is Stage 2 — see
-- CURTEACONT-PLAN.md for the 15-25h roadmap).
--
-- Stage 2 (next session):
-- - For each row with NULL pdf_url, fetch detail page → extract
-- /downloads/{id} numeric PDF ID + file size.
-- - Optionally download PDF to satra disk under /opt/vreaudigital/data/cdc/.
-- - Run pdfminer/pdftotext against first 3 pages → extract structured
-- summary, findings_count, key amounts.
-- - Fuzzy-match audited_entity_name against firms.entities.denumire (lib
-- curatat already exists at services/seap-scraper/src/matching/) → fill
-- audited_entity_cui.
--
-- PRIMARY KEY:
-- slug_id = sha1(category || '|' || slug). The numeric download ID is NULL
-- until Stage 2 resolves it from the detail page. We keep it nullable + add
-- a separate UNIQUE constraint when discovered.
--
-- Cross-source value (recipe drafts in CURTEACONT-PLAN.md):
-- 1. "Autorități audited de N ori în 5 ani" — repeat-audit risk score.
-- 2. "Spitale audited POST SEAP award" — paralelă cu CNAS cross-source.
-- 3. "Rapoarte follow-up" — semnal că auditul anterior n-a fost remediat.
CREATE SCHEMA IF NOT EXISTS curteacont;
-- ── Rapoarte de audit ───────────────────────────────────────────────────────
-- One row per audit report listed by Curtea de Conturi. Source of truth is
-- the listing page slug; numeric download_id (PDF) is filled in Stage 2.
CREATE TABLE IF NOT EXISTS curteacont.rapoarte (
slug_id char(40) PRIMARY KEY, -- sha1(category|slug)
download_id integer, -- /downloads/{id}, filled in Stage 2
category text NOT NULL, -- 'rapoarte-audit-financiar' | 'rapoarte-conformitate' | 'rapoarte-audit-performanta'
slug text NOT NULL, -- last URL segment, unique within category
detail_url text NOT NULL, -- absolute URL to detail page
title text NOT NULL, -- raw title from listing
audit_type text, -- 'financiar' | 'conformitate' | 'performanta' | 'control' | 'follow-up'
audit_year smallint, -- year the audit covers (e.g. 2024 in "pentru anul 2024")
doc_number text, -- "nr.27500" → "27500"
doc_date date, -- "07.04.2026" parsed
audited_entity_name text, -- raw extracted from title after the last comma
audited_entity_cui text, -- filled in Stage 2 via fuzzy match
publication_date date, -- from <time datetime="..."> on listing card
pdf_url text, -- /rapoarte-audit/downloads/{id} — Stage 2
pdf_size_bytes bigint, -- parsed from "(X,YZ MB)" — Stage 2
pdf_path text, -- if mirrored to satra disk — Stage 2 optional
summary text, -- first-page abstract — Stage 2 PDF parse
findings_count integer, -- count of "constatări" — Stage 2 PDF parse
fetched_at timestamptz NOT NULL DEFAULT now(),
parsed_at timestamptz -- set when Stage 2 PDF parse completes
);
CREATE UNIQUE INDEX IF NOT EXISTS rapoarte_category_slug_uniq
ON curteacont.rapoarte (category, slug);
CREATE UNIQUE INDEX IF NOT EXISTS rapoarte_download_id_uniq
ON curteacont.rapoarte (download_id) WHERE download_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS rapoarte_audit_year_idx
ON curteacont.rapoarte (audit_year DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS rapoarte_audit_type_idx
ON curteacont.rapoarte (audit_type);
CREATE INDEX IF NOT EXISTS rapoarte_pub_date_idx
ON curteacont.rapoarte (publication_date DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS rapoarte_audited_cui_idx
ON curteacont.rapoarte (audited_entity_cui) WHERE audited_entity_cui IS NOT NULL;
-- Trigram index for fuzzy entity-name matching (Stage 2 needs it for CUI resolve).
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE INDEX IF NOT EXISTS rapoarte_entity_trgm_idx
ON curteacont.rapoarte USING gin (audited_entity_name gin_trgm_ops)
WHERE audited_entity_name IS NOT NULL;
-- ── Scrape run log ──────────────────────────────────────────────────────────
-- One row per CLI invocation. Idempotent inserts on (started_at,category).
CREATE TABLE IF NOT EXISTS curteacont.scrape_runs (
id bigserial PRIMARY KEY,
category text NOT NULL,
started_at timestamptz NOT NULL DEFAULT now(),
finished_at timestamptz,
pages_visited integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
last_error text,
notes text
);
CREATE INDEX IF NOT EXISTS scrape_runs_started_idx
ON curteacont.scrape_runs (started_at DESC);
COMMENT ON SCHEMA curteacont IS
'Curtea de Conturi audit reports (https://www.curteadeconturi.ro/rapoarte-audit). Stage 1 = URL+metadata harvest from listing pages. Stage 2 = detail-page resolve + PDF parse + CUI fuzzy match.';
COMMENT ON TABLE curteacont.rapoarte IS
'One row per audit report. PK is sha1(category|slug). Numeric download_id and PDF metadata filled in Stage 2 (see CURTEACONT-PLAN.md).';
+235
View File
@@ -0,0 +1,235 @@
-- 036_apia.sql
-- APIA — Agenția de Plăți și Intervenție pentru Agricultură.
-- "Lista fermierilor" — annual list of farmers receiving direct payments
-- (subvenții) per UAT campaign. Covers ~each commune that publishes a list
-- via data.gov.ro (currently 2024 only; one comuna live, more on the way).
--
-- Sources investigated 2026-05-10:
-- 1. data.gov.ro CKAN — only ONE published "Lista fermieri APIA" XLSX
-- lives at /api/3/action/package_show?id=lista-fermierilor-campania-apia-2024
-- (single resource: comuna Găgești, jud. Vaslui, 192 farmers).
-- The schema is per-comuna so future ingests over the same package
-- will multiply rows linearly.
-- 2. https://www.apia.org.ro/ — bot-blocked (HTTP 403 from non-browser
-- User-Agents). National-level lists exist on APIA's site but require
-- JS / browser session to retrieve. Out of scope for this pass.
-- 3. AFIR FEGA dump (fonduri.afir_plati WHERE tip_fond='FEGA', 4.29M rows
-- for 2023+2024) is the *closest* national equivalent — it contains
-- payment amounts but no SUPRAFATA (hectares) and no
-- RESPONSABIL UAT / CENTRUL APIA fields.
--
-- This schema is therefore intentionally narrow but extensible:
-- - one row per (campaign_year, name, comuna, sat) — natural composite key
-- - source_dataset_id + source_resource_id on every row → idempotent re-ingest
-- - cui populated only when the row is a legal person (SC ... SRL / PFA);
-- for natural persons (CNP-keyed in source) cui stays NULL
--
-- Cross-source value:
-- apia.fermieri.cui × fonduri.afir_plati(tip_fond='FEGA').cui
-- = "Fermier in lista APIA care apare ȘI in plățile FEGA AFIR" — sanity
-- check duplicate-receipt audit. APIA list shows hectares declared,
-- FEGA shows EUR plătiți; ratio EUR/ha → outliers.
-- apia.fermieri.cui × anaf.datornici.cui
-- = "Fermier (PFA/SRL) cu datorii la stat care primește subvenții APIA"
-- — direct red flag.
-- apia.fermieri.name (PF, no CUI) × ani.declaratii.persoana_name
-- = persoane cu funcții publice care primesc subvenții agricole.
-- apia.fermieri.cui × seap.announcements.supplier_cui
-- = ferme care iau și subvenții și contracte publice.
CREATE SCHEMA IF NOT EXISTS apia;
-- ── 1. Lista fermieri ─────────────────────────────────────────────────────
-- One row per farmer × campaign × comuna × sat. Composite uniqueness chosen
-- because data.gov.ro source has no national ID column (no CNP/CUI per row).
-- 'name' is raw "NUME PRENUME" string from the published XLSX.
CREATE TABLE IF NOT EXISTS apia.fermieri (
id bigserial PRIMARY KEY,
campaign_year smallint NOT NULL, -- e.g. 2024 (also 2023 SUPRAFATA exists in same row but campaign year is publication year)
name text NOT NULL, -- raw "NUME PRENUME" or "SC ... SRL"
name_normalized text, -- firms.normalize_company_name(name) — only when looks like PJ
cui text, -- only if matched to firms.entities (PJ rows like "SC X SRL")
cui_match_method text, -- 'exact_norm' | 'trgm_unique' | NULL
cui_match_score numeric(4,3),
is_legal_person boolean, -- guessed from name shape (SC, SRL, PFA, II, IF, SA prefixes/suffixes)
judet text, -- enriched via centru_apia mapping (Găgești → VS Vaslui)
comuna_oras text, -- raw "COMUNA/ORAS" cell
sat text,
centru_apia text, -- "CENTRUL APIA" (e.g. MURGENI)
responsabil_uat text, -- "RESPONSABIL UAT 2024" (the UAT employee, not the farmer)
suprafata_ha numeric(12,4), -- "SUPRAFATA 2023" hectares, decimal allowed (e.g. 1.04, 12.45)
source_dataset_id text NOT NULL, -- CKAN package_id, e.g. 'lista-fermierilor-campania-apia-2024'
source_resource_id text NOT NULL, -- CKAN resource_id (UUID)
source_url text NOT NULL, -- direct XLSX download URL
fetched_at timestamptz NOT NULL DEFAULT now(),
-- NULLS NOT DISTINCT: treat NULL sat as a single value so we don't get
-- duplicate rows when source omits sat for some farmers.
UNIQUE NULLS NOT DISTINCT (campaign_year, name, comuna_oras, sat)
);
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_cui ON apia.fermieri(cui) WHERE cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_year ON apia.fermieri(campaign_year);
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_judet ON apia.fermieri(judet);
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_comuna ON apia.fermieri(comuna_oras);
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_centru ON apia.fermieri(centru_apia);
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_resource ON apia.fermieri(source_resource_id);
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_suprafata ON apia.fermieri(suprafata_ha DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_name_trgm ON apia.fermieri USING gin (name_normalized gin_trgm_ops);
COMMENT ON TABLE apia.fermieri IS
'Lista fermierilor publicată de APIA prin UAT-uri pe data.gov.ro. '
'Sursă primară: CKAN package "lista-fermierilor-campania-apia-2024". '
'Cross-source cu fonduri.afir_plati (FEGA) și anaf.datornici.';
COMMENT ON COLUMN apia.fermieri.suprafata_ha IS
'Hectare declarate — coloana "SUPRAFATA 2023" (precedent campaign).';
COMMENT ON COLUMN apia.fermieri.is_legal_person IS
'true = nume conține markeri "SC/SRL/SA/PFA/II/IF/CABINET" → potențial match firms.entities.';
COMMENT ON COLUMN apia.fermieri.cui_match_method IS
'exact_norm = match exact pe firms.normalize_company_name; '
'trgm_unique = match trigram unic peste 0.85; NULL = nepotrivit (probabil PF).';
-- ── 1b. Staging table (used by importer for COPY → INSERT pipeline) ─────
CREATE TABLE IF NOT EXISTS apia.staging_fermieri (
campaign_year text,
name text,
comuna_oras text,
sat text,
centru_apia text,
responsabil_uat text,
suprafata_ha text,
source_dataset_id text,
source_resource_id text,
source_url text
);
COMMENT ON TABLE apia.staging_fermieri IS
'Tabel de staging pentru importul XLSX→COPY. TRUNCATE între import-uri.';
-- ── 2. Scrape log ─────────────────────────────────────────────────────────
-- One row per CKAN-resource ingest. Useful for "ce am importat când" history.
CREATE TABLE IF NOT EXISTS apia.scrape_log (
id bigserial PRIMARY KEY,
source_dataset_id text NOT NULL,
source_resource_id text NOT NULL,
source_url text NOT NULL,
campaign_year smallint NOT NULL,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_matched_cui integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_apia_scrape_log_started ON apia.scrape_log(started_at DESC);
CREATE INDEX IF NOT EXISTS idx_apia_scrape_log_resource ON apia.scrape_log(source_resource_id);
-- ── 3. Materialized view: per-CUI rollup ─────────────────────────────────
-- Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY apia.mv_per_cui;
CREATE MATERIALIZED VIEW IF NOT EXISTS apia.mv_per_cui AS
SELECT
cui,
array_agg(DISTINCT campaign_year ORDER BY campaign_year) AS campaign_years,
array_agg(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS judete,
SUM(suprafata_ha) AS total_suprafata_ha,
COUNT(*) AS rows_count,
MAX(fetched_at) AS last_seen_at
FROM apia.fermieri
WHERE cui IS NOT NULL
GROUP BY cui;
CREATE UNIQUE INDEX IF NOT EXISTS idx_apia_mv_per_cui ON apia.mv_per_cui(cui);
COMMENT ON MATERIALIZED VIEW apia.mv_per_cui IS
'Rollup APIA per CUI (doar PJ-uri cu match). '
'Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY apia.mv_per_cui.';
-- ── 4. CUI matcher (call after each ingest) ──────────────────────────────
-- Populates apia.fermieri.cui by joining name_normalized against
-- firms.entities. Conservative: requires *unique* match in firms (no
-- ambiguous trgm collisions).
CREATE OR REPLACE FUNCTION apia.match_cui()
RETURNS TABLE(matched_count bigint, ambiguous_count bigint) AS $$
DECLARE
v_matched bigint := 0;
v_ambiguous bigint := 0;
BEGIN
-- Heuristic: row is candidate PJ if name has SRL/SA/PFA/II/IF/SC/CABINET.
UPDATE apia.fermieri f
SET is_legal_person = TRUE,
name_normalized = firms.normalize_company_name(f.name)
WHERE f.is_legal_person IS NULL
AND f.name ~* '\m(SRL|S\.R\.L\.|S\.A\.|SA|PFA|P\.F\.A\.|II|I\.I\.|IF|I\.F\.|SC|S\.C\.|CABINET|COOPERATIVA|COOP)\M';
-- Exact-norm match
WITH cands AS (
SELECT f.id, e.cui
FROM apia.fermieri f
JOIN firms.entities e
ON e.name_normalized = f.name_normalized
WHERE f.cui IS NULL
AND f.is_legal_person = TRUE
AND f.name_normalized IS NOT NULL
),
uniq AS (
SELECT id, MIN(cui) AS cui
FROM cands
GROUP BY id
HAVING COUNT(DISTINCT cui) = 1
),
upd AS (
UPDATE apia.fermieri f
SET cui = u.cui,
cui_match_method = 'exact_norm',
cui_match_score = 1.0
FROM uniq u
WHERE f.id = u.id
RETURNING f.id
)
SELECT COUNT(*) INTO v_matched FROM upd;
-- Trigram fallback for unmatched PJs (threshold 0.85, must be unique)
WITH cands AS (
SELECT f.id, e.cui,
similarity(e.name_normalized, f.name_normalized) AS sim
FROM apia.fermieri f
JOIN firms.entities e
ON e.name_normalized % f.name_normalized
WHERE f.cui IS NULL
AND f.is_legal_person = TRUE
AND f.name_normalized IS NOT NULL
AND similarity(e.name_normalized, f.name_normalized) >= 0.85
),
ranked AS (
SELECT id, cui, sim,
COUNT(*) OVER (PARTITION BY id) AS n_cands
FROM cands
),
uniq AS (
SELECT DISTINCT ON (id) id, cui, sim
FROM ranked
WHERE n_cands = 1
),
upd AS (
UPDATE apia.fermieri f
SET cui = u.cui,
cui_match_method = 'trgm_unique',
cui_match_score = u.sim
FROM uniq u
WHERE f.id = u.id
RETURNING f.id
)
SELECT COUNT(*) INTO v_ambiguous FROM upd;
matched_count := v_matched;
ambiguous_count := v_ambiguous;
RETURN NEXT;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION apia.match_cui() IS
'Matchează apia.fermieri.cui prin firms.entities. Întâi exact_norm, apoi '
'trgm fallback >=0.85 cu unicitate. Returnează (matched_exact, matched_trgm). '
'Doar rândurile marcate is_legal_person.';
+115
View File
@@ -0,0 +1,115 @@
-- 037_gnm.sql
-- GNM — Garda Națională de Mediu.
-- Public press releases of environmental enforcement actions scraped from
-- gnm.ro/feed/ (WordPress RSS, ~358 items / 36 pages).
--
-- Investigation summary (2026-05-10):
-- • The institution publishes only AGGREGATE statistics (per-month / per-judet
-- totals) in their monthly synthesis PDFs (sinteza_<luna>_<an>.pdf) and the
-- annual activity report (raport_activitate_<an>.pdf). NO per-firm registry
-- is published with CUIs and individual fine amounts.
-- • The only place where individual violators are named is in press releases
-- ("comunicate de presă"). Even there:
-- Most releases reference "doi operatori", "șapte operatori în patru
-- județe" without naming firms.
-- When firms are named (e.g. Petrobrazi, Vega, Lukoil refineries), the
-- individual amount is rarely broken down — they receive a collective
-- "€340,000 în ultimul an" figure.
-- CUIs are NEVER published; we must fuzzy-match on company name +
-- judet via cui_matcher (Stage B of the pipeline).
-- • data.gov.ro has 0 GNM datasets; ANPM publishes IPPC/SEVESO inventories
-- (which we ingest separately) but no fines.
--
-- Conclusion: this is a partial / sample-quality dataset. We capture every
-- press release as gnm.communicate, then run a regex extractor to surface
-- candidate (company, fine_lei, fapta) tuples into gnm.amenzi_extrase. The
-- coverage will be ~5-15% of total GNM enforcement activity (estimated 5K
-- fines/year, of which only ~50-200 firms are named publicly per year).
--
-- The cross-source value remains: any firm publicly shamed by GNM that ALSO
-- wins SEAP construction/industrial contracts is a 1st-page scandal pattern.
-- We accept that we miss the long tail; we capture the headlines.
CREATE SCHEMA IF NOT EXISTS gnm;
-- ── 1. Press releases (one row per gnm.ro article) ─────────────────────────
-- Captures the full enforcement-related communicate published by GNM. Used
-- both as raw archive (in case interpretation rules change) and as parent
-- for extracted violator rows.
CREATE TABLE IF NOT EXISTS gnm.comunicate (
id bigserial PRIMARY KEY,
guid text NOT NULL UNIQUE, -- WordPress GUID (stable post id)
url text NOT NULL,
titlu text NOT NULL,
publicat_la timestamptz, -- pubDate from RSS
autor text, -- dc:creator
categorii text[], -- e.g. {COMUNICATE DE PRESĂ, NOUTĂȚI}
continut_html text, -- raw content:encoded
continut_text text, -- HTML-stripped, line-collapsed
is_enforcement boolean NOT NULL DEFAULT false,
-- true if title/body matches
-- /amenz|sancțiun|sancțiun|sistare|confiscat/i
total_amenzi_lei numeric, -- sum mentioned in article (best-effort)
raw_hash text NOT NULL, -- sha1(continut_text) for change detection
fetched_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_gnm_com_publicat ON gnm.comunicate(publicat_la DESC);
CREATE INDEX IF NOT EXISTS idx_gnm_com_enforcement ON gnm.comunicate(is_enforcement) WHERE is_enforcement;
CREATE INDEX IF NOT EXISTS idx_gnm_com_total_amenzi ON gnm.comunicate(total_amenzi_lei DESC NULLS LAST);
COMMENT ON TABLE gnm.comunicate IS
'GNM press releases (gnm.ro/feed/). Source-of-truth raw archive. Articles flagged is_enforcement when text mentions fines/sanctions; gnm.amenzi_extrase populated by NLP-light extractor.';
-- ── 2. Extracted violator records ──────────────────────────────────────────
-- One row per (article × candidate firm) tuple identified by the regex/NER
-- pass. Most enforcement articles have 0-3 firms named; some have none
-- (collective references like "operatori industriali din Prahova").
--
-- contravenient_cui is filled by Stage B fuzzy match against firms.cui_lookup
-- using contravenient_name + judet hint. Score ≥ 0.85 is acceptable.
CREATE TABLE IF NOT EXISTS gnm.amenzi_extrase (
id bigserial PRIMARY KEY,
comunicat_id bigint NOT NULL REFERENCES gnm.comunicate(id) ON DELETE CASCADE,
contravenient_name text NOT NULL, -- raw mention (e.g. "Rafinăria Petrobrazi")
contravenient_name_norm text, -- firms.normalize_company_name(); NULL until Stage B
contravenient_cui text, -- fuzzy-matched, NULL when unmatched
cui_match_method text, -- 'direct' | 'fuzzy_name' | 'fuzzy_name_judet' | NULL
cui_match_score numeric, -- 0..1
matched_at timestamptz,
judet text, -- inferred from article title/body
fapta text, -- short violation description (extracted snippet)
suma_lei numeric, -- per-firm amount when present, NULL when only aggregate
suma_eur numeric, -- when source quotes EUR (rare)
suma_aggregate boolean NOT NULL DEFAULT false,
-- true when amount applies to >1 firm collectively
context_snippet text NOT NULL, -- the sentence(s) that triggered extraction
fetched_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_gnm_amenzi_cui ON gnm.amenzi_extrase(contravenient_cui)
WHERE contravenient_cui IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_gnm_amenzi_name_norm ON gnm.amenzi_extrase(contravenient_name_norm);
CREATE INDEX IF NOT EXISTS idx_gnm_amenzi_judet ON gnm.amenzi_extrase(judet);
CREATE INDEX IF NOT EXISTS idx_gnm_amenzi_suma ON gnm.amenzi_extrase(suma_lei DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_gnm_amenzi_comunicat ON gnm.amenzi_extrase(comunicat_id);
COMMENT ON TABLE gnm.amenzi_extrase IS
'Best-effort extracted violator tuples from gnm.comunicate. Coverage is partial — only firms named in press releases. Use gnm.comunicate.is_enforcement for full enforcement-article archive.';
-- ── 3. Scrape log (mirrors anre/ancom convention) ──────────────────────────
CREATE TABLE IF NOT EXISTS gnm.scrape_log (
id bigserial PRIMARY KEY,
scraper text NOT NULL, -- 'rss_feed' | 'extractor'
source_url text NOT NULL,
rows_seen integer NOT NULL DEFAULT 0,
rows_inserted integer NOT NULL DEFAULT 0,
rows_updated integer NOT NULL DEFAULT 0,
rows_skipped integer NOT NULL DEFAULT 0,
duration_ms integer NOT NULL DEFAULT 0,
started_at timestamptz NOT NULL,
finished_at timestamptz NOT NULL DEFAULT now(),
error text
);
CREATE INDEX IF NOT EXISTS idx_gnm_scrape_log_started ON gnm.scrape_log(started_at DESC);
@@ -0,0 +1,98 @@
-- 038_bugetar_cui_match_stage_b.sql
-- Stage B (fuzzy) CUI match for bugetar.entitate using judet constraint.
-- After this runs, expected post-match coverage: 58% → ~70-75% (estimate
-- ~1,500 new matches added via trgm_judet method at threshold 0.78).
--
-- IMPORTANT: this is a LONG-RUNNING SQL (~30-60 min on full table) because
-- it does fuzzy matching across ~7,857 unmatched bugetar rows × millions of
-- firms.entities rows. It's batched per judet to keep the query plan tractable.
--
-- Idempotent — UPDATEs only WHERE cui IS NULL.
-- Run via: psql -f 038_bugetar_cui_match_stage_b.sql
-- Map: 2-letter judet code → uppercase-ASCII variant present in firms.adr_judet
-- (firms uses old cedilla characters Ş/Ţ; translate strips diacritics to ASCII).
DROP TABLE IF EXISTS tmp_judet_map;
CREATE TEMP TABLE tmp_judet_map (code text PRIMARY KEY, name_ascii text NOT NULL);
INSERT INTO tmp_judet_map VALUES
('AB','ALBA'),('AG','ARGES'),('AR','ARAD'),('B','MUNICIPIUL BUCURESTI'),
('BC','BACAU'),('BH','BIHOR'),('BN','BISTRITA-NASAUD'),('BR','BRAILA'),
('BT','BOTOSANI'),('BV','BRASOV'),('BZ','BUZAU'),('CJ','CLUJ'),
('CL','CALARASI'),('CS','CARAS-SEVERIN'),('CT','CONSTANTA'),('CV','COVASNA'),
('DB','DAMBOVITA'),('DJ','DOLJ'),('GJ','GORJ'),('GL','GALATI'),
('GR','GIURGIU'),('HD','HUNEDOARA'),('HR','HARGHITA'),('IF','ILFOV'),
('IL','IALOMITA'),('IS','IASI'),('MH','MEHEDINTI'),('MM','MARAMURES'),
('MS','MURES'),('NT','NEAMT'),('OT','OLT'),('PH','PRAHOVA'),
('SB','SIBIU'),('SJ','SALAJ'),('SM','SATU MARE'),('SV','SUCEAVA'),
('TL','TULCEA'),('TM','TIMIS'),('TR','TELEORMAN'),('VL','VALCEA'),
('VN','VRANCEA'),('VS','VASLUI');
-- Materialize a per-judet view of firms.entities with ASCII-normalized adr_judet
-- so the trgm join doesn't recompute translate() per probe.
DROP TABLE IF EXISTS tmp_firms_by_judet;
CREATE TEMP TABLE tmp_firms_by_judet AS
SELECT cui, name, name_normalized,
UPPER(translate(COALESCE(adr_judet,''), 'ŞȘŢȚăâîĂÂÎ', 'SSTTAAIAAI')) AS judet_ascii
FROM firms.entities
WHERE name_normalized IS NOT NULL AND adr_judet IS NOT NULL;
CREATE INDEX ON tmp_firms_by_judet (judet_ascii);
CREATE INDEX ON tmp_firms_by_judet USING gin (name_normalized gin_trgm_ops);
ANALYZE tmp_firms_by_judet;
-- Materialize unmatched bugetar rows with normalized names + expected judet
DROP TABLE IF EXISTS tmp_bugetar_unmatched;
CREATE TEMP TABLE tmp_bugetar_unmatched AS
SELECT b.id, b.entity_name, jm.name_ascii AS expected_judet,
firms.normalize_company_name(b.entity_name) AS bn_norm
FROM bugetar.entitate b
JOIN tmp_judet_map jm ON jm.code = b.judet
WHERE b.cui IS NULL;
CREATE INDEX ON tmp_bugetar_unmatched (expected_judet);
-- Per-judet UPDATE loop using DO block (sequential per judet → bounded planner cost)
SET pg_trgm.similarity_threshold = 0.78;
DO $$
DECLARE
jcode text;
jname text;
updated integer;
total_updated integer := 0;
BEGIN
FOR jcode, jname IN SELECT code, name_ascii FROM tmp_judet_map ORDER BY code LOOP
WITH unmatched_in_judet AS (
SELECT id, bn_norm FROM tmp_bugetar_unmatched WHERE expected_judet = jname
),
candidates AS (
SELECT DISTINCT ON (u.id) u.id, f.cui,
similarity(f.name_normalized, u.bn_norm) AS score
FROM unmatched_in_judet u
JOIN tmp_firms_by_judet f
ON f.judet_ascii = jname AND f.name_normalized % u.bn_norm
ORDER BY u.id, similarity(f.name_normalized, u.bn_norm) DESC
)
UPDATE bugetar.entitate b
SET cui = c.cui,
cui_match_score = c.score::numeric(5,2),
cui_match_method = 'trgm_judet',
updated_at = now()
FROM candidates c
WHERE b.id = c.id AND b.cui IS NULL AND c.score >= 0.78;
GET DIAGNOSTICS updated = ROW_COUNT;
total_updated := total_updated + updated;
RAISE NOTICE ' judet=% (%) matched=%', jcode, jname, updated;
END LOOP;
RAISE NOTICE '=== total trgm_judet matches: % ===', total_updated;
END $$;
-- Final stats
SELECT cui_match_method, count(*),
round(avg(cui_match_score)::numeric, 2) AS avg_score
FROM bugetar.entitate
WHERE cui IS NOT NULL
GROUP BY 1
ORDER BY 2 DESC;
-- Cleanup
DROP TABLE tmp_judet_map, tmp_firms_by_judet, tmp_bugetar_unmatched;
@@ -0,0 +1,62 @@
-- 039_bugetar_uat_pattern_match.sql
-- High-precision CUI match for bugetar.entitate UAT entries by stripping
-- ONRC parenthetical suffix "X (PRIMARIA Y)" and comparing normalized names.
--
-- Replaces the trgm-based 038 attempt which was too slow (90+ min, low yield)
-- and hit false positives on the COMUNA-PRIMARIA naming gap.
--
-- Insight: ONRC stores comune/orașe with parenthetical suffix:
-- "Comuna Surduc (Primaria Comunei Surduc)"
-- "COMUNA CIZER (PRIMARIA)"
-- "Comuna Mesesenii de Jos (Primaria Mesesenii de Jos Jud. Salaj)"
-- Bugetar entries are clean: "COMUNA SURDUC". Stripping ' (...)' from ONRC
-- name and comparing normalized → exact match → high-confidence resolve.
--
-- Idempotent: UPDATEs only WHERE cui IS NULL. Threshold-agnostic.
-- Test runtime: ~1.7s per judet, 42 judete → ~70s total.
\timing on
SET pg_trgm.similarity_threshold = 0.78; -- not used here but reset for safety
WITH judet_map AS (
SELECT * FROM (VALUES
('AB','ALBA'),('AG','ARGES'),('AR','ARAD'),('B','MUNICIPIUL BUCURESTI'),
('BC','BACAU'),('BH','BIHOR'),('BN','BISTRITA-NASAUD'),('BR','BRAILA'),
('BT','BOTOSANI'),('BV','BRASOV'),('BZ','BUZAU'),('CJ','CLUJ'),
('CL','CALARASI'),('CS','CARAS-SEVERIN'),('CT','CONSTANTA'),('CV','COVASNA'),
('DB','DAMBOVITA'),('DJ','DOLJ'),('GJ','GORJ'),('GL','GALATI'),
('GR','GIURGIU'),('HD','HUNEDOARA'),('HR','HARGHITA'),('IF','ILFOV'),
('IL','IALOMITA'),('IS','IASI'),('MH','MEHEDINTI'),('MM','MARAMURES'),
('MS','MURES'),('NT','NEAMT'),('OT','OLT'),('PH','PRAHOVA'),
('SB','SIBIU'),('SJ','SALAJ'),('SM','SATU MARE'),('SV','SUCEAVA'),
('TL','TULCEA'),('TM','TIMIS'),('TR','TELEORMAN'),('VL','VALCEA'),
('VN','VRANCEA'),('VS','VASLUI')
) AS m(code, name_ascii)
),
candidates AS (
SELECT DISTINCT ON (b.id) b.id, e.cui, e.name AS firm_name
FROM bugetar.entitate b
JOIN judet_map jm ON jm.code = b.judet
JOIN firms.entities e
ON firms.normalize_company_name(regexp_replace(e.name, '\s*\(.*$', '')) = firms.normalize_company_name(b.entity_name)
AND UPPER(translate(COALESCE(e.adr_judet,''), 'ŞȘŢȚăâîĂÂÎ', 'SSTTAAIAAI')) = jm.name_ascii
WHERE b.cui IS NULL
ORDER BY b.id, e.cui -- deterministic when multiple firms share normalized stripped name
)
UPDATE bugetar.entitate b
SET cui = c.cui,
cui_match_score = 0.95::numeric(5,2), -- high-confidence stripped-exact match
cui_match_method = 'uat_pattern',
updated_at = now()
FROM candidates c
WHERE b.id = c.id AND b.cui IS NULL;
-- Final stats
SELECT cui_match_method, count(*),
round(avg(cui_match_score)::numeric, 2) AS avg_score
FROM bugetar.entitate
WHERE cui IS NOT NULL
GROUP BY 1
ORDER BY 2 DESC;
SELECT 'unmatched' AS t, count(*) FROM bugetar.entitate WHERE cui IS NULL;
@@ -0,0 +1,105 @@
-- 040_curteacont_uat_pattern_match.sql
-- High-precision CUI match for curteacont.rapoarte using UAT-pattern + strip-parens.
--
-- Curtea de Conturi uses specific abbreviations for territorial units:
-- UATC X → COMUNA X
-- UATJ X → JUDETUL X
-- UATO X → ORASUL X / ORAS X
-- UATM X → MUNICIPIUL X
-- UAT SECTOR N (... BUCURESTI) → SECTOR N
--
-- ONRC stores these with parenthetical suffix indicating the operating body:
-- "JUDETUL MARAMURES (CONSILIUL JUDETEAN MARAMURES)"
-- "Comuna Surduc (Primaria Comunei Surduc)"
-- "SECTOR 3 (PRIMARIA SECTOR 3 BUCURESTI)"
--
-- Strip ONRC " (...)" suffix and compare normalized → exact match.
--
-- Idempotent: UPDATEs only WHERE audited_entity_cui IS NULL.
\timing on
SET pg_trgm.similarity_threshold = 0.78; -- safety reset
-- Build a small prefiltered firms table once (UATs only ~10K rows)
DROP TABLE IF EXISTS tmp_firms_uat;
CREATE TEMP TABLE tmp_firms_uat AS
SELECT cui, name,
firms.normalize_company_name(regexp_replace(name, '\s*\(.*$', '')) AS norm_stripped
FROM firms.entities
WHERE name ~* '^(COMUNA |JUDETUL |ORAS |ORASUL |MUNICIPIUL |SECTOR(UL)? [1-6])';
CREATE INDEX ON tmp_firms_uat (norm_stripped);
ANALYZE tmp_firms_uat;
-- Compute expected ONRC-form name for each cdc audited entity
DROP TABLE IF EXISTS tmp_cdc_uat;
CREATE TEMP TABLE tmp_cdc_uat AS
SELECT slug_id, audited_entity_name,
firms.normalize_company_name(
CASE
WHEN audited_entity_name ~* '^UATC '
THEN 'COMUNA ' || regexp_replace(audited_entity_name, '^UATC\s+', '', 'i')
WHEN audited_entity_name ~* '^UATJ '
THEN 'JUDETUL ' || regexp_replace(audited_entity_name, '^UATJ\s+', '', 'i')
WHEN audited_entity_name ~* '^UATO '
THEN 'ORAS ' || regexp_replace(audited_entity_name, '^UATO\s+', '', 'i')
WHEN audited_entity_name ~* '^UATM '
THEN 'MUNICIPIUL ' || regexp_replace(audited_entity_name, '^UATM\s+', '', 'i')
WHEN audited_entity_name ~* '^UAT SECTOR(UL)? [1-6]'
THEN 'SECTOR ' || substring(audited_entity_name FROM '^UAT SECTOR(?:UL)? ([1-6])')
ELSE NULL
END
) AS expected_norm
FROM curteacont.rapoarte
WHERE audited_entity_cui IS NULL
AND audited_entity_name IS NOT NULL
AND audited_entity_name ~* '^(UATC |UATJ |UATO |UATM |UAT SECTOR)';
-- Stats before update
SELECT count(*) AS unmapped_uat_rows FROM tmp_cdc_uat WHERE expected_norm IS NOT NULL;
-- Apply the match
WITH candidates AS (
SELECT DISTINCT ON (c.slug_id) c.slug_id, f.cui
FROM tmp_cdc_uat c
JOIN tmp_firms_uat f ON f.norm_stripped = c.expected_norm
ORDER BY c.slug_id, f.cui
)
UPDATE curteacont.rapoarte r
SET audited_entity_cui = c.cui,
parsed_at = COALESCE(r.parsed_at, now())
FROM candidates c
WHERE r.slug_id = c.slug_id AND r.audited_entity_cui IS NULL;
-- Also try a fallback exact-match path for non-UAT names (ministries etc.)
-- Match audited_entity_name directly to firms.entities.name with strip-parens.
WITH cdc_non_uat AS (
SELECT slug_id, audited_entity_name,
firms.normalize_company_name(audited_entity_name) AS norm
FROM curteacont.rapoarte
WHERE audited_entity_cui IS NULL
AND audited_entity_name IS NOT NULL
AND audited_entity_name !~* '^(UATC |UATJ |UATO |UATM |UAT SECTOR)'
),
candidates2 AS (
SELECT DISTINCT ON (c.slug_id) c.slug_id, e.cui
FROM cdc_non_uat c
JOIN firms.entities e
ON firms.normalize_company_name(regexp_replace(e.name, '\s*\(.*$', '')) = c.norm
ORDER BY c.slug_id, e.cui
)
UPDATE curteacont.rapoarte r
SET audited_entity_cui = c.cui,
parsed_at = COALESCE(r.parsed_at, now())
FROM candidates2 c
WHERE r.slug_id = c.slug_id AND r.audited_entity_cui IS NULL;
-- Final stats
SELECT count(*) AS total,
count(audited_entity_cui) AS with_cui,
round(100.0 * count(audited_entity_cui) / count(*), 1) AS pct
FROM curteacont.rapoarte;
-- Refresh the per-audited rollup if exists (no MV defined yet for curteacont but
-- the autoritate profile pulls live; no refresh needed)
DROP TABLE tmp_firms_uat, tmp_cdc_uat;
@@ -0,0 +1,87 @@
-- 041_curteacont_cleaned_name_match.sql
-- Follow-up to 040 — handles the residual 131 rows whose audited_entity_name
-- contains the prefix " ) privind raportul de audit al performantei nr.X, ENTITY"
-- (scraper bug: parser kept the prefix instead of just the entity).
--
-- Extract the entity name via split-on-last-comma, then retry both
-- UAT-pattern and strip-parens match.
--
-- Source bug should also be fixed in services/seap-scraper/src/scrape-curteacont.ts
-- but that's a separate task; SQL repair lands the data improvement immediately.
\timing on
DROP TABLE IF EXISTS tmp_cdc_residue;
CREATE TEMP TABLE tmp_cdc_residue AS
SELECT slug_id, audited_entity_name,
trim(reverse(split_part(reverse(audited_entity_name), ',', 1))) AS clean_name
FROM curteacont.rapoarte
WHERE audited_entity_cui IS NULL
AND audited_entity_name IS NOT NULL
AND audited_entity_name ~ '\) privind raportul';
-- Pass 1: UAT-pattern on cleaned names
WITH cleaned AS (
SELECT slug_id, clean_name,
firms.normalize_company_name(
CASE
WHEN clean_name ~* '^UATC ' THEN 'COMUNA ' || regexp_replace(clean_name, '^UATC\s+', '', 'i')
WHEN clean_name ~* '^UATJ ' THEN 'JUDETUL ' || regexp_replace(clean_name, '^UATJ\s+', '', 'i')
WHEN clean_name ~* '^UATO ' THEN 'ORAS ' || regexp_replace(clean_name, '^UATO\s+', '', 'i')
WHEN clean_name ~* '^UATM ' THEN 'MUNICIPIUL ' || regexp_replace(clean_name, '^UATM\s+', '', 'i')
WHEN clean_name ~* '^UAT SECTOR(UL)? [1-6]'
THEN 'SECTOR ' || substring(clean_name FROM '^UAT SECTOR(?:UL)? ([1-6])')
ELSE NULL
END
) AS expected_norm
FROM tmp_cdc_residue
),
firms_uat AS (
SELECT cui, firms.normalize_company_name(regexp_replace(name, '\s*\(.*$', '')) AS norm_stripped
FROM firms.entities
WHERE name ~* '^(COMUNA |JUDETUL |ORAS |ORASUL |MUNICIPIUL |SECTOR(UL)? [1-6])'
),
candidates AS (
SELECT DISTINCT ON (c.slug_id) c.slug_id, f.cui
FROM cleaned c
JOIN firms_uat f ON f.norm_stripped = c.expected_norm
WHERE c.expected_norm IS NOT NULL
ORDER BY c.slug_id, f.cui
)
UPDATE curteacont.rapoarte r
SET audited_entity_cui = c.cui,
audited_entity_name = trim(reverse(split_part(reverse(r.audited_entity_name), ',', 1))), -- also fix the name field
parsed_at = COALESCE(r.parsed_at, now())
FROM candidates c
WHERE r.slug_id = c.slug_id AND r.audited_entity_cui IS NULL;
-- Pass 2: strip-parens exact on cleaned name + ONRC stripped name
WITH cleaned AS (
SELECT slug_id,
trim(reverse(split_part(reverse(audited_entity_name), ',', 1))) AS clean_name
FROM curteacont.rapoarte
WHERE audited_entity_cui IS NULL
AND audited_entity_name ~ '\) privind raportul'
),
candidates2 AS (
SELECT DISTINCT ON (c.slug_id) c.slug_id, e.cui
FROM cleaned c
JOIN firms.entities e
ON firms.normalize_company_name(regexp_replace(e.name, '\s*\(.*$', ''))
= firms.normalize_company_name(regexp_replace(c.clean_name, '\s*\(.*$', ''))
ORDER BY c.slug_id, e.cui
)
UPDATE curteacont.rapoarte r
SET audited_entity_cui = c.cui,
audited_entity_name = trim(reverse(split_part(reverse(r.audited_entity_name), ',', 1))),
parsed_at = COALESCE(r.parsed_at, now())
FROM candidates2 c
WHERE r.slug_id = c.slug_id AND r.audited_entity_cui IS NULL;
-- Final stats
SELECT count(*) AS total,
count(audited_entity_cui) AS with_cui,
round(100.0 * count(audited_entity_cui) / count(*), 1) AS pct
FROM curteacont.rapoarte;
DROP TABLE tmp_cdc_residue;
@@ -0,0 +1,101 @@
-- 042_cnsc_authority_cui_match.sql
-- Backfill authority_cuis array on cnsc.decizii using the strip-parens +
-- UAT-pattern strategy proven by bugetar 039 + curteacont 040.
--
-- Current state: 29,488 decizii, 12,527 (42%) have authority_cuis populated
-- via the scraper's authority_cui_raw extraction. Remaining 16,961 (58%)
-- have authority_name but no CUI.
--
-- CNSC names use these patterns:
-- COMUNA X / ORASUL X / MUNICIPIUL X / JUDETUL X → UAT direct
-- PRIMARIA COMUNEI X / PRIMARIA X → strip PRIMARIA, try UAT
-- CONSILIUL JUDETEAN X / CJ X → "CONSILIUL JUDETEAN X" / "JUDETUL X"
-- <COMPANY NAME> SA / SRL → direct firm name match
-- <institution> — strip-parens fallback
--
-- This SQL UPDATEs authority_cuis = ARRAY[cui]::text[] when a match is found.
-- The mv_per_authority_cui must be refreshed afterward.
--
-- Idempotent: only updates rows where authority_cuis IS NULL or empty.
\timing on
-- Build the UAT firm cache once (reusable across passes)
DROP TABLE IF EXISTS tmp_firms_uat;
CREATE TEMP TABLE tmp_firms_uat AS
SELECT cui, name,
firms.normalize_company_name(regexp_replace(name, '\s*\(.*$', '')) AS norm_stripped
FROM firms.entities
WHERE name ~* '^(COMUNA |JUDETUL |ORAS |ORASUL |MUNICIPIUL |SECTOR(UL)? [1-6]|CONSILIUL JUDETEAN |PRIMARIA )';
CREATE INDEX ON tmp_firms_uat (norm_stripped);
ANALYZE tmp_firms_uat;
-- Unmatched authority rows with normalized expected forms
DROP TABLE IF EXISTS tmp_cnsc_unmatched;
CREATE TEMP TABLE tmp_cnsc_unmatched AS
SELECT id, authority_name,
-- Expected ONRC-form normalized name (try several patterns; pick best one):
firms.normalize_company_name(authority_name) AS direct_norm,
firms.normalize_company_name(
regexp_replace(authority_name, '^PRIMARIA\s+(COMUNEI\s+|ORASULUI\s+|MUNICIPIULUI\s+|JUDETULUI\s+)?', '', 'i')
) AS primaria_stripped_norm,
firms.normalize_company_name(
CASE WHEN authority_name ~* '^CONSILIUL JUDETEAN '
THEN 'JUDETUL ' || regexp_replace(authority_name, '^CONSILIUL JUDETEAN\s+', '', 'i')
ELSE NULL END
) AS cj_norm
FROM cnsc.decizii
WHERE (authority_cuis IS NULL OR array_length(authority_cuis, 1) IS NULL)
AND authority_name IS NOT NULL;
CREATE INDEX ON tmp_cnsc_unmatched (direct_norm);
-- Pass 1: direct strip-parens match (firm name matches CNSC authority_name)
WITH candidates AS (
SELECT DISTINCT ON (u.id) u.id, e.cui
FROM tmp_cnsc_unmatched u
JOIN firms.entities e
ON firms.normalize_company_name(regexp_replace(e.name, '\s*\(.*$', '')) = u.direct_norm
ORDER BY u.id, e.cui
)
UPDATE cnsc.decizii d
SET authority_cuis = ARRAY[c.cui]::text[]
FROM candidates c
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
-- Pass 2: PRIMARIA-stripped match (PRIMARIA COMUNEI X → match COMUNA X / ORAS X etc.)
WITH candidates AS (
SELECT DISTINCT ON (u.id) u.id, f.cui
FROM tmp_cnsc_unmatched u
JOIN tmp_firms_uat f ON f.norm_stripped = u.primaria_stripped_norm
WHERE u.primaria_stripped_norm != u.direct_norm -- only when PRIMARIA-strip changed the name
ORDER BY u.id, f.cui
)
UPDATE cnsc.decizii d
SET authority_cuis = ARRAY[c.cui]::text[]
FROM candidates c
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
-- Pass 3: CONSILIUL JUDETEAN X → JUDETUL X
WITH candidates AS (
SELECT DISTINCT ON (u.id) u.id, f.cui
FROM tmp_cnsc_unmatched u
JOIN tmp_firms_uat f ON f.norm_stripped = u.cj_norm
WHERE u.cj_norm IS NOT NULL
ORDER BY u.id, f.cui
)
UPDATE cnsc.decizii d
SET authority_cuis = ARRAY[c.cui]::text[]
FROM candidates c
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
-- Refresh the per-authority MV
REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui;
-- Final stats
SELECT
count(*) AS total,
count(*) FILTER (WHERE array_length(authority_cuis, 1) > 0) AS with_auth_cui,
round(100.0 * count(*) FILTER (WHERE array_length(authority_cuis, 1) > 0) / count(*), 1) AS pct
FROM cnsc.decizii;
DROP TABLE tmp_firms_uat, tmp_cnsc_unmatched;
@@ -0,0 +1,87 @@
-- 043_red_flags_kpi_snapshot.sql
-- Materialize red-flags landing KPI counters to a static refresh table.
-- Original page was 30s with all KPI INTERSECTs + 13 recipe fetches running.
-- KPI INTERSECTs (TRIPLE/QUADRA pipe etc.) alone = 12s. Now ~1ms read.
--
-- Refresh: nightly via mvs cron at 04:00 (or add to refresh-mvs.sh).
CREATE SCHEMA IF NOT EXISTS public_kpi;
CREATE TABLE IF NOT EXISTS public_kpi.red_flags_counts (
k_name text PRIMARY KEY,
k_value bigint NOT NULL,
refreshed_at timestamptz NOT NULL DEFAULT now()
);
COMMENT ON TABLE public_kpi.red_flags_counts IS
'KPI counters surfaced on /achizitii/red-flags landing. Refreshed nightly. Page reads as a single SELECT to avoid 12s INTERSECT cost.';
-- Refresh function — called by cron
CREATE OR REPLACE FUNCTION public_kpi.refresh_red_flags_counts() RETURNS void AS $$
BEGIN
INSERT INTO public_kpi.red_flags_counts (k_name, k_value, refreshed_at) VALUES
('regas_seap_firms',
(SELECT count(DISTINCT cui) FROM regas.ajutoare WHERE cui IS NOT NULL
AND cui IN (SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL)),
now()),
('regas_seap_records',
(SELECT count(*) FROM regas.ajutoare WHERE cui IS NOT NULL
AND cui IN (SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL)),
now()),
('ben_seap_firms',
(SELECT count(DISTINCT cui) FROM fonduri.beneficiar_anunt WHERE cui IS NOT NULL
AND cui IN (SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL)),
now()),
('datornic_seap_firms',
(SELECT count(DISTINCT d.cui) FROM anaf.datornici d
JOIN seap.announcements a ON a.supplier_cui = d.cui
AND a.publication_date::date > d.publication_date
AND a.awarded_value > 100000),
now()),
('aep_seap_firms',
(SELECT count(DISTINCT donator_cui) FROM aep.donatii_pj
WHERE donator_cui IS NOT NULL
AND donator_cui IN (SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL AND awarded_value > 100000)),
now()),
('triple_firms',
(SELECT count(*) FROM (
SELECT b.cui FROM fonduri.beneficiar_anunt b WHERE cui IS NOT NULL
INTERSECT SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL
INTERSECT SELECT cui FROM fonduri.afir_plati WHERE cui IS NOT NULL
) t),
now()),
('quadra_firms',
(SELECT count(*) FROM (
SELECT b.cui FROM fonduri.beneficiar_anunt b WHERE cui IS NOT NULL
INTERSECT SELECT supplier_cui FROM seap.announcements WHERE supplier_cui IS NOT NULL
INTERSECT SELECT cui FROM fonduri.afir_plati WHERE cui IS NOT NULL
INTERSECT SELECT cui FROM regas.ajutoare WHERE cui IS NOT NULL
) t),
now()),
('donator_datornic_firms',
(SELECT count(DISTINCT a.donator_cui) FROM aep.donatii_pj a
JOIN anaf.datornici_latest d ON d.cui = a.donator_cui),
now()),
('anre_datornic_firms',
(SELECT count(DISTINCT a.cui) FROM anre.mv_licente_per_cui a
JOIN anaf.datornici_latest d ON d.cui = a.cui WHERE a.nr_active > 0),
now()),
('dubla_alerta_firms',
(SELECT count(*) FROM (
SELECT audited_entity_cui AS cui FROM curteacont.rapoarte
WHERE audited_entity_cui IS NOT NULL AND publication_date >= now() - interval '5 years'
GROUP BY audited_entity_cui HAVING count(*) >= 2
INTERSECT
SELECT cui FROM cnsc.mv_per_authority_cui WHERE contestation_count >= 3
) t),
now())
ON CONFLICT (k_name) DO UPDATE
SET k_value = EXCLUDED.k_value,
refreshed_at = EXCLUDED.refreshed_at;
END;
$$ LANGUAGE plpgsql;
-- Initial populate
SELECT public_kpi.refresh_red_flags_counts();
SELECT k_name, k_value, refreshed_at FROM public_kpi.red_flags_counts ORDER BY k_name;
@@ -0,0 +1,628 @@
-- 044_red_flags_previews_snapshot.sql
-- Materialize TOP-5 row previews per red-flag recipe to a snapshot table.
-- Original landing was ~17s (13 recipe.fetch() calls running live, each 1-12s).
-- Now: single SELECT against this table → ~5ms.
--
-- Refresh: nightly from refresh-mvs.sh after KPI snapshot refresh.
--
-- Mirrors the RecipeRow interface (src/lib/recipes.ts):
-- primary, primaryHref, secondary, metric, metricRaw, detail, badge.{label,tone}
--
-- For each of the 13 RED_FLAG_SLUGS in src/pages/achizitii/red-flags.astro,
-- we re-implement the SQL inline (returning the formatted output the page
-- needs) and select TOP-5 per slug.
CREATE SCHEMA IF NOT EXISTS public_kpi;
CREATE TABLE IF NOT EXISTS public_kpi.red_flags_previews (
slug text NOT NULL,
position smallint NOT NULL,
primary_text text,
primary_href text,
secondary text,
metric text,
metric_raw numeric,
detail text,
badge_label text,
badge_tone text,
computed_at timestamptz NOT NULL DEFAULT now(),
PRIMARY KEY (slug, position)
);
COMMENT ON TABLE public_kpi.red_flags_previews IS
'Top-5 row previews per red-flag recipe. Refreshed nightly via refresh-mvs.sh. Read once per landing page load instead of running 13 live cross-source queries.';
-- ─────────────────────────────────────────────────────────────────────────────
-- Helper: RON formatter mirroring fmtRON() in src/lib/recipes.ts.
-- Returns short-form: "1.2 mld", "12.3 mil", "456K", "789".
CREATE OR REPLACE FUNCTION public_kpi.fmt_ron(v numeric) RETURNS text AS $$
BEGIN
IF v IS NULL OR v <= 0 THEN RETURN '0'; END IF;
IF v >= 1000000000 THEN RETURN to_char(v / 1000000000.0, 'FM999990.0') || ' mld'; END IF;
IF v >= 1000000 THEN RETURN to_char(v / 1000000.0, 'FM999990.0') || ' mil'; END IF;
IF v >= 1000 THEN RETURN to_char(round(v / 1000.0), 'FM999999990') || 'K'; END IF;
RETURN to_char(round(v), 'FM999999990');
END;
$$ LANGUAGE plpgsql IMMUTABLE;
-- ─────────────────────────────────────────────────────────────────────────────
CREATE OR REPLACE FUNCTION public_kpi.refresh_red_flags_previews() RETURNS void AS $$
BEGIN
DELETE FROM public_kpi.red_flags_previews;
-- ───── firme-quadra-pipe-public ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH ben AS (
SELECT b.cui, COUNT(*) AS anunturi_eu, SUM(l.buget_lei)::numeric AS buget_eu
FROM fonduri.beneficiar_anunt b
LEFT JOIN fonduri.beneficiar_anunt_lot l ON l.anunt_id = b.id
WHERE b.cui IS NOT NULL GROUP BY b.cui
),
seap AS (
SELECT supplier_cui AS cui, COUNT(*) AS contracte, SUM(awarded_value)::numeric AS valoare
FROM seap.announcements
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
GROUP BY supplier_cui
),
afir AS (
SELECT cui, COUNT(*) AS plati,
SUM(COALESCE(ue_total, feadr_total, fega_total, op_amount, 0))::numeric AS afir_lei
FROM fonduri.afir_plati WHERE cui IS NOT NULL GROUP BY cui
),
regas AS (
SELECT cui, COUNT(*) AS nr_ajutoare,
SUM(ajutor_acordat_subcategorie)::numeric AS regas_lei
FROM regas.ajutoare WHERE cui IS NOT NULL GROUP BY cui
),
ranked AS (
SELECT b.cui, e.name, e.adr_judet,
b.buget_eu, s.valoare AS seap_lei, a.afir_lei, r.regas_lei,
(COALESCE(b.buget_eu,0) + COALESCE(s.valoare,0) + COALESCE(a.afir_lei,0) + COALESCE(r.regas_lei,0))::numeric AS total_combined,
ROW_NUMBER() OVER (ORDER BY (COALESCE(b.buget_eu,0) + COALESCE(s.valoare,0) + COALESCE(a.afir_lei,0) + COALESCE(r.regas_lei,0)) DESC NULLS LAST) AS rn
FROM ben b
JOIN seap s ON s.cui = b.cui
JOIN afir a ON a.cui = b.cui
JOIN regas r ON r.cui = b.cui
JOIN firms.entities e ON e.cui = b.cui
)
SELECT 'firme-quadra-pipe-public', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
public_kpi.fmt_ron(total_combined) || ' RON',
total_combined,
'EU: ' || public_kpi.fmt_ron(COALESCE(buget_eu,0)) || ' · SEAP: ' || public_kpi.fmt_ron(COALESCE(seap_lei,0)) || ' · AFIR: ' || public_kpi.fmt_ron(COALESCE(afir_lei,0)) || ' · RegAS: ' || public_kpi.fmt_ron(COALESCE(regas_lei,0)) || ' RON',
'🔱 QUADRA pipe', 'risk'
FROM ranked WHERE rn <= 5;
-- ───── firme-triplu-pipe-public ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH ben AS (
SELECT b.cui, COUNT(*) AS anunturi_eu, SUM(l.buget_lei)::numeric AS buget_eu
FROM fonduri.beneficiar_anunt b
LEFT JOIN fonduri.beneficiar_anunt_lot l ON l.anunt_id = b.id
WHERE b.cui IS NOT NULL GROUP BY b.cui
),
seap AS (
SELECT supplier_cui AS cui, COUNT(*) AS contracte, SUM(awarded_value)::numeric AS valoare
FROM seap.announcements
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
GROUP BY supplier_cui
),
afir AS (
SELECT cui, COUNT(*) AS plati,
SUM(COALESCE(ue_total, feadr_total, fega_total, op_amount, 0))::numeric AS afir_lei
FROM fonduri.afir_plati WHERE cui IS NOT NULL GROUP BY cui
),
ranked AS (
SELECT b.cui, e.name, e.adr_judet,
b.anunturi_eu, b.buget_eu, s.contracte, s.valoare AS seap_lei, a.plati, a.afir_lei,
(COALESCE(b.buget_eu,0) + COALESCE(s.valoare,0) + COALESCE(a.afir_lei,0))::numeric AS total_combined,
ROW_NUMBER() OVER (ORDER BY (COALESCE(b.buget_eu,0) + COALESCE(s.valoare,0) + COALESCE(a.afir_lei,0)) DESC NULLS LAST) AS rn
FROM ben b
JOIN seap s ON s.cui = b.cui
JOIN afir a ON a.cui = b.cui
JOIN firms.entities e ON e.cui = b.cui
)
SELECT 'firme-triplu-pipe-public', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
public_kpi.fmt_ron(total_combined) || ' RON',
total_combined,
'EU privat: ' || anunturi_eu || ' / ' || public_kpi.fmt_ron(COALESCE(buget_eu,0)) || ' · SEAP: ' || contracte || ' / ' || public_kpi.fmt_ron(COALESCE(seap_lei,0)) || ' · AFIR: ' || plati || ' / ' || public_kpi.fmt_ron(COALESCE(afir_lei,0)) || ' RON',
'🔱 triplu pipe', 'risk'
FROM ranked WHERE rn <= 5;
-- ───── firme-datornice-cu-contracte-seap ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH agg AS (
SELECT d.cui, d.name,
MIN(d.publication_date) AS pub_date,
MIN(d.period_label) AS period_label,
MAX(d.debt_total) AS debt_total,
MAX(d.debtor_category) AS debtor_category,
COUNT(DISTINCT a.id) AS contracte,
SUM(a.awarded_value)::numeric AS contracte_lei,
MAX(a.publication_date::date) AS ultim_contract
FROM anaf.datornici d
JOIN seap.announcements a
ON a.supplier_cui = d.cui
AND a.publication_date::date > d.publication_date
WHERE a.awarded_value IS NOT NULL AND a.awarded_value > 0
GROUP BY d.cui, d.name
HAVING SUM(a.awarded_value) > 100000
),
ranked AS (
SELECT agg.*, e.adr_judet AS judet,
ROW_NUMBER() OVER (ORDER BY contracte_lei DESC NULLS LAST) AS rn
FROM agg LEFT JOIN firms.entities e ON e.cui = agg.cui
)
SELECT 'firme-datornice-cu-contracte-seap', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN judet IS NOT NULL THEN ' · ' || judet ELSE '' END || ' · ' || COALESCE(period_label, 'T1 2016'),
public_kpi.fmt_ron(contracte_lei) || ' RON',
contracte_lei,
'Datorie ' || public_kpi.fmt_ron(COALESCE(debt_total,0)) || ' RON (' || COALESCE(debtor_category::text, '') || ') · ' || contracte || ' contracte · ultim ' || COALESCE(ultim_contract::text, '?'),
'🚨 datornic + contract', 'risk'
FROM ranked WHERE rn <= 5;
-- ───── donatori-care-au-castigat-seap ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH don AS (
SELECT donator_cui AS cui,
MIN(donator_nume) AS donator,
COUNT(*) AS nr_donatii,
SUM(suma_lei)::numeric AS total_donatii,
array_agg(DISTINCT partid_id ORDER BY partid_id) AS partide,
MIN(an) AS prima, MAX(an) AS ultima
FROM aep.donatii_pj
WHERE donator_cui IS NOT NULL
GROUP BY donator_cui
),
seap AS (
SELECT supplier_cui AS cui,
COUNT(*) AS contracte,
SUM(awarded_value)::numeric AS contracte_lei,
COUNT(DISTINCT authority_cui) AS autoritati
FROM seap.announcements
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
GROUP BY supplier_cui
),
ranked AS (
SELECT d.cui, COALESCE(e.name, d.donator) AS firma, e.adr_judet AS judet,
d.nr_donatii, d.total_donatii, d.partide, d.prima, d.ultima,
s.contracte, s.contracte_lei, s.autoritati,
ROW_NUMBER() OVER (ORDER BY s.contracte_lei DESC NULLS LAST) AS rn
FROM don d
JOIN seap s ON s.cui = d.cui
LEFT JOIN firms.entities e ON e.cui = d.cui
WHERE s.contracte_lei > 100000
)
SELECT 'donatori-care-au-castigat-seap', rn::smallint,
COALESCE(firma, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN judet IS NOT NULL THEN ' · ' || judet ELSE '' END || ' · partide: ' || array_to_string(COALESCE(partide, ARRAY[]::text[]), ', '),
public_kpi.fmt_ron(contracte_lei) || ' RON',
contracte_lei,
nr_donatii || ' donații (' || public_kpi.fmt_ron(COALESCE(total_donatii,0)) || ' RON, ' || prima || '-' || ultima || ') · ' || contracte || ' contracte la ' || autoritati || ' autorități',
'🗳️ donator + furnizor', 'warn'
FROM ranked WHERE rn <= 5;
-- ───── donatori-politici-care-datoreaza-statului ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH aep_agg AS (
SELECT donator_cui AS cui,
SUM(suma_lei)::numeric AS total_donat,
array_agg(DISTINCT partid_id ORDER BY partid_id) FILTER (WHERE partid_id IS NOT NULL) AS partide,
COUNT(*) AS nr_donatii,
MIN(an) AS prima_an, MAX(an) AS ultima_an
FROM aep.donatii_pj
WHERE donator_cui IS NOT NULL
GROUP BY donator_cui
),
seap_supplier AS (
SELECT supplier_cui AS cui,
COUNT(*) AS contracte,
SUM(awarded_value)::numeric AS valoare_seap
FROM seap.announcements
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
GROUP BY supplier_cui
),
ranked AS (
SELECT d.cui, d.period_label, d.debt_total::numeric AS debt_total,
a.total_donat, a.partide, a.nr_donatii, a.prima_an, a.ultima_an,
e.name, e.adr_judet,
COALESCE(s.contracte, 0) AS contracte_seap,
COALESCE(s.valoare_seap, 0)::numeric AS valoare_seap,
ROW_NUMBER() OVER (ORDER BY d.debt_total DESC NULLS LAST) AS rn
FROM aep_agg a
JOIN anaf.datornici_latest d ON d.cui = a.cui
LEFT JOIN firms.entities e ON e.cui = a.cui
LEFT JOIN seap_supplier s ON s.cui = a.cui
)
SELECT 'donatori-politici-care-datoreaza-statului', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END || ' · partide: ' || array_to_string(COALESCE(partide, ARRAY[]::text[]), ', ') || ' (' || prima_an || '' || ultima_an || ')',
public_kpi.fmt_ron(debt_total) || ' RON datorie',
debt_total,
'🗳️ donat ' || public_kpi.fmt_ron(COALESCE(total_donat,0)) || ' RON · 🚨 datornic ' || COALESCE(period_label,'') ||
CASE WHEN contracte_seap > 0 THEN ' · 📜 ' || contracte_seap || ' contracte SEAP (' || public_kpi.fmt_ron(valoare_seap) || ' RON)' ELSE '' END,
'🚨 donator + datornic', 'risk'
FROM ranked WHERE rn <= 5;
-- ───── donatori-politici-care-contesta-la-cnsc ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH aep_agg AS (
SELECT donator_cui AS cui,
SUM(suma_lei)::numeric AS total_donat,
array_agg(DISTINCT partid_id ORDER BY partid_id) FILTER (WHERE partid_id IS NOT NULL) AS partide,
COUNT(*) AS nr_donatii,
MAX(an) AS ultima_an
FROM aep.donatii_pj
WHERE donator_cui IS NOT NULL
GROUP BY donator_cui
),
seap AS (
SELECT supplier_cui AS cui,
COUNT(*) AS contracte,
SUM(awarded_value)::numeric AS valoare_seap
FROM seap.announcements
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
GROUP BY supplier_cui
),
ranked AS (
SELECT a.cui, e.name, e.adr_judet,
a.total_donat, a.partide, a.ultima_an,
c.contestations_filed,
COALESCE(s.contracte, 0) AS contracte_seap,
COALESCE(s.valoare_seap, 0)::numeric AS valoare_seap,
ROW_NUMBER() OVER (ORDER BY (a.total_donat * c.contestations_filed) DESC NULLS LAST) AS rn
FROM aep_agg a
JOIN cnsc.mv_per_contestator_cui c ON c.cui = a.cui
LEFT JOIN firms.entities e ON e.cui = a.cui
LEFT JOIN seap s ON s.cui = a.cui
)
SELECT 'donatori-politici-care-contesta-la-cnsc', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END || ' · partide: ' || array_to_string(COALESCE(partide, ARRAY[]::text[]), ', ') || ' (ultima ' || ultima_an || ')',
public_kpi.fmt_ron(total_donat) || ' × ' || contestations_filed,
(total_donat * contestations_filed)::numeric,
'🗳️ donat ' || public_kpi.fmt_ron(total_donat) || ' RON · ⚖️ ' || contestations_filed || ' contestații CNSC' ||
CASE WHEN contracte_seap > 0 THEN ' · 📜 ' || contracte_seap || ' contracte SEAP (' || public_kpi.fmt_ron(valoare_seap) || ' RON)' ELSE '' END,
'🗳️⚖️ donator + contestator', 'risk'
FROM ranked WHERE rn <= 5;
-- ───── energie-licentiati-anre-datornici-anaf ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH seap AS (
SELECT supplier_cui AS cui,
COUNT(*) AS contracte,
SUM(awarded_value)::numeric AS valoare_seap
FROM seap.announcements
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
GROUP BY supplier_cui
),
ranked AS (
SELECT a.cui, e.name, e.adr_judet,
a.nr_active, a.nr_expirate, a.surse,
d.debt_total::numeric AS debt_total, d.period_label,
COALESCE(s.contracte, 0) AS contracte_seap,
COALESCE(s.valoare_seap, 0)::numeric AS valoare_seap,
ROW_NUMBER() OVER (ORDER BY d.debt_total DESC NULLS LAST) AS rn
FROM anre.mv_licente_per_cui a
JOIN anaf.datornici_latest d ON d.cui = a.cui
LEFT JOIN firms.entities e ON e.cui = a.cui
LEFT JOIN seap s ON s.cui = a.cui
WHERE a.nr_active > 0
)
SELECT 'energie-licentiati-anre-datornici-anaf', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END || ' · 🔌 ' || nr_active || ' licențe active (' || array_to_string(COALESCE(surse, ARRAY[]::text[]), '/') || ')' || CASE WHEN nr_expirate > 0 THEN ', ' || nr_expirate || ' expirate' ELSE '' END,
public_kpi.fmt_ron(debt_total) || ' RON datorie',
debt_total,
'🚨 datornic ' || COALESCE(period_label, '') ||
CASE WHEN contracte_seap > 0 THEN ' · 📜 ' || contracte_seap || ' contracte SEAP (' || public_kpi.fmt_ron(valoare_seap) || ' RON)' ELSE '' END,
'🚨 ANRE + datornic', 'risk'
FROM ranked WHERE rn <= 5;
-- ───── autoritati-dubla-alerta-cdc-cnsc ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH cdc AS (
SELECT audited_entity_cui AS cui,
COUNT(*) AS audit_count,
array_agg(DISTINCT audit_type) FILTER (WHERE audit_type IS NOT NULL) AS audit_types
FROM curteacont.rapoarte
WHERE audited_entity_cui IS NOT NULL
AND publication_date >= now() - interval '5 years'
GROUP BY audited_entity_cui
HAVING COUNT(*) >= 2
),
cnsc_ AS (
SELECT cui, contestation_count
FROM cnsc.mv_per_authority_cui
WHERE contestation_count >= 3
),
seap AS (
SELECT authority_cui AS cui,
COUNT(*) AS proceduri,
SUM(COALESCE(awarded_value, estimated_value))::numeric AS valoare
FROM seap.announcements
WHERE authority_cui IS NOT NULL
GROUP BY authority_cui
),
ranked AS (
SELECT a.cui, e.name, e.adr_judet,
a.audit_count, a.audit_types,
c.contestation_count,
(a.audit_count + c.contestation_count) AS total_signals,
COALESCE(s.proceduri, 0) AS proceduri,
COALESCE(s.valoare, 0)::numeric AS valoare,
ROW_NUMBER() OVER (ORDER BY (a.audit_count + c.contestation_count) DESC, c.contestation_count DESC) AS rn
FROM cdc a
JOIN cnsc_ c ON c.cui = a.cui
LEFT JOIN firms.entities e ON e.cui = a.cui
LEFT JOIN seap s ON s.cui = a.cui
)
SELECT 'autoritati-dubla-alerta-cdc-cnsc', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/autoritate/' || cui,
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
total_signals || ' semnale',
total_signals::numeric,
'📋 ' || audit_count || ' audituri (' || array_to_string(COALESCE(audit_types, ARRAY[]::text[]), '/') || ') · ⚖️ ' || contestation_count || ' contestații CNSC · ' || proceduri || ' proceduri SEAP · ' || public_kpi.fmt_ron(COALESCE(valoare,0)) || ' RON',
'🚨 dublă alertă', 'risk'
FROM ranked WHERE rn <= 5;
-- ───── autoritati-contestate-cnsc ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH ranked AS (
SELECT m.cui, m.contestation_count,
m.first_contestation_date, m.last_contestation_date,
e.name, e.adr_judet,
COALESCE(s.proceduri, 0) AS proceduri,
COALESCE(s.valoare, 0)::numeric AS valoare,
ROW_NUMBER() OVER (ORDER BY m.contestation_count DESC) AS rn
FROM cnsc.mv_per_authority_cui m
LEFT JOIN firms.entities e ON e.cui = m.cui
LEFT JOIN (
SELECT authority_cui,
COUNT(*) AS proceduri,
SUM(awarded_value)::numeric AS valoare
FROM seap.announcements
WHERE authority_cui IS NOT NULL
GROUP BY authority_cui
) s ON s.authority_cui = m.cui
)
SELECT 'autoritati-contestate-cnsc', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/autoritate/' || cui,
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
contestation_count || ' contestații',
contestation_count::numeric,
proceduri || ' proceduri SEAP · ' || public_kpi.fmt_ron(COALESCE(valoare,0)) || ' RON · contestații ' ||
COALESCE(extract(year FROM first_contestation_date)::text, '?') || '' ||
COALESCE(extract(year FROM last_contestation_date)::text, '?'),
'⚖️ CNSC', 'warn'
FROM ranked WHERE rn <= 5;
-- ───── energie-fara-licenta ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH energy_seap AS (
SELECT a.supplier_cui,
COUNT(*) AS contracte,
SUM(a.awarded_value)::numeric AS valoare,
COUNT(DISTINCT a.authority_cui) AS autoritati,
array_agg(DISTINCT substr(a.cpv_code, 1, 4)) AS cpv_prefs
FROM seap.announcements a
WHERE a.supplier_cui IS NOT NULL
AND a.awarded_value > 0
AND a.cpv_code LIKE '09%'
GROUP BY a.supplier_cui
),
ranked AS (
SELECT s.supplier_cui AS cui, e.name, e.adr_judet,
s.contracte, s.valoare, s.autoritati, s.cpv_prefs,
ROW_NUMBER() OVER (ORDER BY s.valoare DESC NULLS LAST) AS rn
FROM energy_seap s
LEFT JOIN anre.mv_licente_per_cui m ON m.cui = s.supplier_cui
JOIN firms.entities e ON e.cui = s.supplier_cui
WHERE m.cui IS NULL
)
SELECT 'energie-fara-licenta', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
public_kpi.fmt_ron(valoare) || ' RON',
valoare,
contracte || ' contracte · ' || autoritati || ' autorități · CPV ' || array_to_string((COALESCE(cpv_prefs, ARRAY[]::text[]))[1:5], ', '),
'fără licență ANRE', 'risk'
FROM ranked WHERE rn <= 5;
-- ───── telco-fara-licenta ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH telco_seap AS (
SELECT a.supplier_cui,
COUNT(*) AS contracte,
SUM(a.awarded_value)::numeric AS valoare,
COUNT(DISTINCT a.authority_cui) AS autoritati,
array_agg(DISTINCT substr(a.cpv_code, 1, 4)) AS cpv_prefs
FROM seap.announcements a
WHERE a.supplier_cui IS NOT NULL
AND a.awarded_value > 0
AND a.cpv_code LIKE '64%'
GROUP BY a.supplier_cui
),
ranked AS (
SELECT s.supplier_cui AS cui, e.name, e.adr_judet,
s.contracte, s.valoare, s.autoritati, s.cpv_prefs,
ROW_NUMBER() OVER (ORDER BY s.valoare DESC NULLS LAST) AS rn
FROM telco_seap s
LEFT JOIN ancom.mv_operatori_per_cui m ON m.cui = s.supplier_cui
JOIN firms.entities e ON e.cui = s.supplier_cui
WHERE m.cui IS NULL
)
SELECT 'telco-fara-licenta', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
public_kpi.fmt_ron(valoare) || ' RON',
valoare,
contracte || ' contracte · ' || autoritati || ' autorități · CPV ' || array_to_string((COALESCE(cpv_prefs, ARRAY[]::text[]))[1:5], ', '),
'fără autorizare ANCOM', 'risk'
FROM ranked WHERE rn <= 5;
-- ───── stat-actionar-seap ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH aaas_agg AS (
SELECT cui, max_state_share_pct, total_debt_to_state_lei, statusuri
FROM aaas.mv_per_cui
),
seap AS (
SELECT supplier_cui AS cui,
COUNT(*) AS contracte,
SUM(awarded_value)::numeric AS total_lei,
COUNT(DISTINCT authority_cui) AS autoritati
FROM seap.announcements
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
GROUP BY supplier_cui
),
ranked AS (
SELECT a.cui,
COALESCE(e.name, f.name) AS firma,
e.adr_judet AS judet,
a.max_state_share_pct,
a.total_debt_to_state_lei,
a.statusuri,
s.contracte, s.total_lei, s.autoritati,
ROW_NUMBER() OVER (ORDER BY s.total_lei DESC NULLS LAST) AS rn
FROM aaas_agg a
JOIN seap s ON s.cui = a.cui
LEFT JOIN firms.entities e ON e.cui = a.cui
LEFT JOIN aaas.firme f ON f.cui = a.cui
)
SELECT 'stat-actionar-seap', rn::smallint,
COALESCE(firma, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN judet IS NOT NULL THEN ' · ' || judet ELSE '' END || ' · stat ' || CASE WHEN max_state_share_pct IS NOT NULL THEN to_char(max_state_share_pct, 'FM999990.0') || '%' ELSE '?' END || ' · ' || array_to_string(COALESCE(statusuri, ARRAY[]::text[]), ', '),
public_kpi.fmt_ron(total_lei) || ' RON',
total_lei,
contracte || ' contracte la ' || autoritati || ' autorități' ||
CASE WHEN total_debt_to_state_lei IS NOT NULL AND total_debt_to_state_lei > 0
THEN ' · datorii la stat ' || public_kpi.fmt_ron(total_debt_to_state_lei) || ' RON'
ELSE '' END,
'🏛️ stat→stat', 'risk'
FROM ranked WHERE rn <= 5;
-- ───── firme-cu-ajutor-de-stat-si-seap ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH regas_amount AS (
SELECT cui,
COUNT(*) AS nr_ajutoare,
SUM(ajutor_acordat_subcategorie) AS total_ajutor,
COUNT(DISTINCT id_masura) AS nr_masuri,
COUNT(DISTINCT finantator) AS nr_finantatori
FROM regas.ajutoare
WHERE cui IS NOT NULL
GROUP BY cui
),
seap_amount AS (
SELECT supplier_cui AS cui,
COUNT(*) AS contracte_seap,
SUM(awarded_value) AS valoare_seap,
COUNT(DISTINCT authority_cui) AS autoritati
FROM seap.announcements
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
GROUP BY supplier_cui
),
ranked AS (
SELECT r.cui, e.name, e.adr_judet,
r.nr_ajutoare, r.total_ajutor::numeric AS total_ajutor, r.nr_finantatori,
s.contracte_seap, s.valoare_seap::numeric AS valoare_seap, s.autoritati,
(COALESCE(r.total_ajutor,0) + COALESCE(s.valoare_seap,0))::numeric AS total_combined,
ROW_NUMBER() OVER (ORDER BY (COALESCE(r.total_ajutor,0) + COALESCE(s.valoare_seap,0)) DESC NULLS LAST) AS rn
FROM regas_amount r
JOIN seap_amount s ON s.cui = r.cui
JOIN firms.entities e ON e.cui = r.cui
WHERE COALESCE(r.total_ajutor,0) + COALESCE(s.valoare_seap,0) > 0
)
SELECT 'firme-cu-ajutor-de-stat-si-seap', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
public_kpi.fmt_ron(total_combined) || ' RON',
total_combined,
'Ajutor stat: ' || nr_ajutoare || ' ajutoare / ' || public_kpi.fmt_ron(COALESCE(total_ajutor,0)) || ' RON la ' || nr_finantatori || ' finanțatori · SEAP: ' || contracte_seap || ' contracte / ' || public_kpi.fmt_ron(COALESCE(valoare_seap,0)) || ' RON la ' || autoritati || ' autorități',
'Ajutor + SEAP', 'warn'
FROM ranked WHERE rn <= 5;
-- ───── firme-cu-fonduri-eu-si-seap ─────
INSERT INTO public_kpi.red_flags_previews
(slug, position, primary_text, primary_href, secondary, metric, metric_raw, detail, badge_label, badge_tone)
WITH ben_amount AS (
SELECT b.cui,
COUNT(*) AS anunturi_eu,
SUM(l.buget_lei) AS buget_eu
FROM fonduri.beneficiar_anunt b
LEFT JOIN fonduri.beneficiar_anunt_lot l ON l.anunt_id = b.id
WHERE b.cui IS NOT NULL
GROUP BY b.cui
),
seap_amount AS (
SELECT supplier_cui AS cui,
COUNT(*) AS contracte_seap,
SUM(awarded_value) AS valoare_seap,
COUNT(DISTINCT authority_cui) AS autoritati
FROM seap.announcements
WHERE supplier_cui IS NOT NULL AND awarded_value > 0
GROUP BY supplier_cui
),
ranked AS (
SELECT b.cui, e.name, e.adr_judet,
b.anunturi_eu, b.buget_eu::numeric AS buget_eu,
s.contracte_seap, s.valoare_seap::numeric AS valoare_seap, s.autoritati,
(COALESCE(b.buget_eu,0) + COALESCE(s.valoare_seap,0))::numeric AS total_combined,
ROW_NUMBER() OVER (ORDER BY (COALESCE(b.buget_eu,0) + COALESCE(s.valoare_seap,0)) DESC NULLS LAST) AS rn
FROM ben_amount b
JOIN seap_amount s ON s.cui = b.cui
JOIN firms.entities e ON e.cui = b.cui
WHERE COALESCE(b.buget_eu,0) + COALESCE(s.valoare_seap,0) > 0
)
SELECT 'firme-cu-fonduri-eu-si-seap', rn::smallint,
COALESCE(name, 'CUI ' || cui),
'/achizitii/firma/' || cui,
'CUI ' || cui || CASE WHEN adr_judet IS NOT NULL THEN ' · ' || adr_judet ELSE '' END,
public_kpi.fmt_ron(total_combined) || ' RON',
total_combined,
'EU: ' || anunturi_eu || ' anunțuri / ' || public_kpi.fmt_ron(COALESCE(buget_eu,0)) || ' RON · SEAP: ' || contracte_seap || ' contracte / ' || public_kpi.fmt_ron(COALESCE(valoare_seap,0)) || ' RON la ' || autoritati || ' autorități',
'EU + SEAP', 'warn'
FROM ranked WHERE rn <= 5;
END;
$$ LANGUAGE plpgsql;
-- Initial populate
SELECT public_kpi.refresh_red_flags_previews();
SELECT slug, COUNT(*) AS rows
FROM public_kpi.red_flags_previews
GROUP BY slug
ORDER BY slug;
@@ -0,0 +1,22 @@
-- 045: composite index on seap.announcements (publication_date DESC NULLS LAST, id DESC)
--
-- Problem: /achizitii/cauta with no filters (home browse) was slow (~3s on
-- empty q). The page does `ORDER BY a.publication_date DESC NULLS LAST,
-- a.id DESC LIMIT 30`, which couldn't use the existing
-- `idx_ann_pub_date btree (publication_date)` because:
-- - Default btree is ASC NULLS LAST (DESC NULLS LAST mismatch for nulls)
-- - Secondary sort `id DESC` requires composite
-- Planner fell back to Seq Scan + top-N sort over 781K rows (~1.5s just
-- for the main result query, plus seq-scan in facet aggregates).
--
-- Fix: composite index matching the exact ORDER BY clause direction.
-- Now Index Only Scan + early LIMIT termination → ~1ms.
--
-- Measurements (production, 781K rows):
-- Before: 1543ms main query, 550ms count, ~3s total /cauta wall time
-- After: 0.7ms main query, ~5ms with LEFT JOIN to cpv_codes/cui_location
--
-- Idempotent: IF NOT EXISTS; CONCURRENTLY to avoid table lock.
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_ann_pub_date_desc_nl
ON seap.announcements (publication_date DESC NULLS LAST, id DESC);
@@ -0,0 +1,140 @@
-- 046: snapshot of /achizitii/cauta no-filter facet aggregates.
--
-- Problem: /cauta home (no filters) still ~1.9s after sql/045's pub_date
-- index fix. Main query is now ~5ms but 6 parallel facet aggregates each
-- do full-table scans:
-- - count(*) GROUP BY type (~200ms)
-- - count(*) GROUP BY county_code (~200ms)
-- - count(*) GROUP BY cpv_division (~200ms)
-- - count(*) GROUP BY procedure_type (~200ms)
-- - count(*) GROUP BY procedure_state (~200ms)
-- - count(*) WHERE awarded_value bucket (~200ms)
--
-- Fix: materialize a single snapshot table holding all default-facet
-- counts. Search code short-circuits to read from snapshot when filters
-- are empty.
--
-- Wins only the no-filter case; any active filter still does live
-- aggregates. That's intentional: filter combinations are exponentially
-- many (cannot pre-materialize) and selective filters keep aggregates
-- fast anyway.
BEGIN;
CREATE TABLE IF NOT EXISTS public_kpi.cauta_default_facets (
facet_name text NOT NULL,
key text NOT NULL,
label text,
emoji text,
count bigint NOT NULL,
sort_order int NOT NULL DEFAULT 0,
computed_at timestamptz NOT NULL DEFAULT now(),
PRIMARY KEY (facet_name, key)
);
CREATE TABLE IF NOT EXISTS public_kpi.cauta_default_totals (
id smallint PRIMARY KEY DEFAULT 1 CHECK (id = 1),
total bigint NOT NULL,
sum_awarded numeric NOT NULL,
computed_at timestamptz NOT NULL DEFAULT now()
);
CREATE OR REPLACE FUNCTION public_kpi.refresh_cauta_defaults()
RETURNS void
LANGUAGE plpgsql
AS $$
BEGIN
-- Totals (single row, idempotent UPSERT)
INSERT INTO public_kpi.cauta_default_totals (id, total, sum_awarded, computed_at)
SELECT 1, count(*), COALESCE(sum(awarded_value), 0), now()
FROM seap.announcements
ON CONFLICT (id) DO UPDATE SET
total = EXCLUDED.total,
sum_awarded = EXCLUDED.sum_awarded,
computed_at = EXCLUDED.computed_at;
-- Wipe facet table, re-populate. Single transaction so reads see
-- consistent state during refresh.
DELETE FROM public_kpi.cauta_default_facets;
-- types (top 12)
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, count, sort_order)
SELECT 'type', type, type, count(*),
ROW_NUMBER() OVER (ORDER BY count(*) DESC)
FROM seap.announcements
GROUP BY type
ORDER BY count(*) DESC
LIMIT 12;
-- counties (top 20)
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, count, sort_order)
SELECT 'county', county_code, county_code, count(*),
ROW_NUMBER() OVER (ORDER BY count(*) DESC)
FROM seap.announcements
WHERE county_code IS NOT NULL
GROUP BY county_code
ORDER BY count(*) DESC
LIMIT 20;
-- cpv divisions (top 15, with label + emoji from cpv_codes)
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, emoji, count, sort_order)
SELECT 'cpv', a.cpv_division, c.name_ro, c.emoji, count(*),
ROW_NUMBER() OVER (ORDER BY count(*) DESC)
FROM seap.announcements a
LEFT JOIN seap.cpv_codes c ON c.code = a.cpv_division
WHERE a.cpv_division IS NOT NULL
GROUP BY a.cpv_division, c.name_ro, c.emoji
ORDER BY count(*) DESC
LIMIT 15;
-- procedure types (top 10)
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, count, sort_order)
SELECT 'procedure', procedure_type, procedure_type, count(*),
ROW_NUMBER() OVER (ORDER BY count(*) DESC)
FROM seap.announcements
WHERE procedure_type IS NOT NULL
GROUP BY procedure_type
ORDER BY count(*) DESC
LIMIT 10;
-- procedure states (top 8)
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, count, sort_order)
SELECT 'state', procedure_state, procedure_state, count(*),
ROW_NUMBER() OVER (ORDER BY count(*) DESC)
FROM seap.announcements
WHERE procedure_state IS NOT NULL
GROUP BY procedure_state
ORDER BY count(*) DESC
LIMIT 8;
-- value buckets (5 buckets + "fără valoare")
INSERT INTO public_kpi.cauta_default_facets (facet_name, key, label, count, sort_order)
SELECT 'value', bucket, bucket, count(*),
CASE bucket
WHEN 'sub 100K' THEN 1
WHEN '100K 1M' THEN 2
WHEN '1M 10M' THEN 3
WHEN '10M 100M' THEN 4
WHEN 'peste 100M' THEN 5
WHEN 'fără valoare' THEN 6
END
FROM (
SELECT CASE
WHEN awarded_value IS NULL OR awarded_value = 0 THEN 'fără valoare'
WHEN awarded_value < 100000 THEN 'sub 100K'
WHEN awarded_value < 1000000 THEN '100K 1M'
WHEN awarded_value < 10000000 THEN '1M 10M'
WHEN awarded_value < 100000000 THEN '10M 100M'
ELSE 'peste 100M'
END AS bucket
FROM seap.announcements
) b
GROUP BY bucket;
END;
$$;
COMMIT;
-- Initial population
SELECT public_kpi.refresh_cauta_defaults();
@@ -0,0 +1,18 @@
-- 047: matching companion to sql/045 — awarded_value DESC NULLS LAST index.
--
-- Problem: /achizitii/cauta?sort=value_desc was ~3.5s. Same root cause as
-- sql/045: existing `idx_ann_value btree (awarded_value)` defaults to
-- ASC NULLS LAST, so ORDER BY awarded_value DESC NULLS LAST cannot use it
-- (NULLS ordering mismatch). Planner did Seq Scan + top-N heap sort over
-- 781K rows.
--
-- Fix: composite-direction index matching the exact ORDER BY clause.
--
-- Measurements (production):
-- Before: 1284ms (Seq Scan + Sort)
-- After: 0.4ms (Index Scan + LIMIT)
--
-- Existing idx_ann_value is kept for the value_asc sort case.
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_ann_value_desc_nl
ON seap.announcements (awarded_value DESC NULLS LAST);