a6c03a091e
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
236 lines
11 KiB
PL/PgSQL
236 lines
11 KiB
PL/PgSQL
-- 036_apia.sql
|
||
-- APIA — Agenția de Plăți și Intervenție pentru Agricultură.
|
||
-- "Lista fermierilor" — annual list of farmers receiving direct payments
|
||
-- (subvenții) per UAT campaign. Covers ~each commune that publishes a list
|
||
-- via data.gov.ro (currently 2024 only; one comuna live, more on the way).
|
||
--
|
||
-- Sources investigated 2026-05-10:
|
||
-- 1. data.gov.ro CKAN — only ONE published "Lista fermieri APIA" XLSX
|
||
-- lives at /api/3/action/package_show?id=lista-fermierilor-campania-apia-2024
|
||
-- (single resource: comuna Găgești, jud. Vaslui, 192 farmers).
|
||
-- The schema is per-comuna so future ingests over the same package
|
||
-- will multiply rows linearly.
|
||
-- 2. https://www.apia.org.ro/ — bot-blocked (HTTP 403 from non-browser
|
||
-- User-Agents). National-level lists exist on APIA's site but require
|
||
-- JS / browser session to retrieve. Out of scope for this pass.
|
||
-- 3. AFIR FEGA dump (fonduri.afir_plati WHERE tip_fond='FEGA', 4.29M rows
|
||
-- for 2023+2024) is the *closest* national equivalent — it contains
|
||
-- payment amounts but no SUPRAFATA (hectares) and no
|
||
-- RESPONSABIL UAT / CENTRUL APIA fields.
|
||
--
|
||
-- This schema is therefore intentionally narrow but extensible:
|
||
-- - one row per (campaign_year, name, comuna, sat) — natural composite key
|
||
-- - source_dataset_id + source_resource_id on every row → idempotent re-ingest
|
||
-- - cui populated only when the row is a legal person (SC ... SRL / PFA);
|
||
-- for natural persons (CNP-keyed in source) cui stays NULL
|
||
--
|
||
-- Cross-source value:
|
||
-- apia.fermieri.cui × fonduri.afir_plati(tip_fond='FEGA').cui
|
||
-- = "Fermier in lista APIA care apare ȘI in plățile FEGA AFIR" — sanity
|
||
-- check duplicate-receipt audit. APIA list shows hectares declared,
|
||
-- FEGA shows EUR plătiți; ratio EUR/ha → outliers.
|
||
-- apia.fermieri.cui × anaf.datornici.cui
|
||
-- = "Fermier (PFA/SRL) cu datorii la stat care primește subvenții APIA"
|
||
-- — direct red flag.
|
||
-- apia.fermieri.name (PF, no CUI) × ani.declaratii.persoana_name
|
||
-- = persoane cu funcții publice care primesc subvenții agricole.
|
||
-- apia.fermieri.cui × seap.announcements.supplier_cui
|
||
-- = ferme care iau și subvenții și contracte publice.
|
||
|
||
CREATE SCHEMA IF NOT EXISTS apia;
|
||
|
||
-- ── 1. Lista fermieri ─────────────────────────────────────────────────────
|
||
-- One row per farmer × campaign × comuna × sat. Composite uniqueness chosen
|
||
-- because data.gov.ro source has no national ID column (no CNP/CUI per row).
|
||
-- 'name' is raw "NUME PRENUME" string from the published XLSX.
|
||
CREATE TABLE IF NOT EXISTS apia.fermieri (
|
||
id bigserial PRIMARY KEY,
|
||
campaign_year smallint NOT NULL, -- e.g. 2024 (also 2023 SUPRAFATA exists in same row but campaign year is publication year)
|
||
name text NOT NULL, -- raw "NUME PRENUME" or "SC ... SRL"
|
||
name_normalized text, -- firms.normalize_company_name(name) — only when looks like PJ
|
||
cui text, -- only if matched to firms.entities (PJ rows like "SC X SRL")
|
||
cui_match_method text, -- 'exact_norm' | 'trgm_unique' | NULL
|
||
cui_match_score numeric(4,3),
|
||
is_legal_person boolean, -- guessed from name shape (SC, SRL, PFA, II, IF, SA prefixes/suffixes)
|
||
judet text, -- enriched via centru_apia mapping (Găgești → VS Vaslui)
|
||
comuna_oras text, -- raw "COMUNA/ORAS" cell
|
||
sat text,
|
||
centru_apia text, -- "CENTRUL APIA" (e.g. MURGENI)
|
||
responsabil_uat text, -- "RESPONSABIL UAT 2024" (the UAT employee, not the farmer)
|
||
suprafata_ha numeric(12,4), -- "SUPRAFATA 2023" hectares, decimal allowed (e.g. 1.04, 12.45)
|
||
source_dataset_id text NOT NULL, -- CKAN package_id, e.g. 'lista-fermierilor-campania-apia-2024'
|
||
source_resource_id text NOT NULL, -- CKAN resource_id (UUID)
|
||
source_url text NOT NULL, -- direct XLSX download URL
|
||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||
-- NULLS NOT DISTINCT: treat NULL sat as a single value so we don't get
|
||
-- duplicate rows when source omits sat for some farmers.
|
||
UNIQUE NULLS NOT DISTINCT (campaign_year, name, comuna_oras, sat)
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_cui ON apia.fermieri(cui) WHERE cui IS NOT NULL;
|
||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_year ON apia.fermieri(campaign_year);
|
||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_judet ON apia.fermieri(judet);
|
||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_comuna ON apia.fermieri(comuna_oras);
|
||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_centru ON apia.fermieri(centru_apia);
|
||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_resource ON apia.fermieri(source_resource_id);
|
||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_suprafata ON apia.fermieri(suprafata_ha DESC NULLS LAST);
|
||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_name_trgm ON apia.fermieri USING gin (name_normalized gin_trgm_ops);
|
||
|
||
COMMENT ON TABLE apia.fermieri IS
|
||
'Lista fermierilor publicată de APIA prin UAT-uri pe data.gov.ro. '
|
||
'Sursă primară: CKAN package "lista-fermierilor-campania-apia-2024". '
|
||
'Cross-source cu fonduri.afir_plati (FEGA) și anaf.datornici.';
|
||
COMMENT ON COLUMN apia.fermieri.suprafata_ha IS
|
||
'Hectare declarate — coloana "SUPRAFATA 2023" (precedent campaign).';
|
||
COMMENT ON COLUMN apia.fermieri.is_legal_person IS
|
||
'true = nume conține markeri "SC/SRL/SA/PFA/II/IF/CABINET" → potențial match firms.entities.';
|
||
COMMENT ON COLUMN apia.fermieri.cui_match_method IS
|
||
'exact_norm = match exact pe firms.normalize_company_name; '
|
||
'trgm_unique = match trigram unic peste 0.85; NULL = nepotrivit (probabil PF).';
|
||
|
||
-- ── 1b. Staging table (used by importer for COPY → INSERT pipeline) ─────
|
||
CREATE TABLE IF NOT EXISTS apia.staging_fermieri (
|
||
campaign_year text,
|
||
name text,
|
||
comuna_oras text,
|
||
sat text,
|
||
centru_apia text,
|
||
responsabil_uat text,
|
||
suprafata_ha text,
|
||
source_dataset_id text,
|
||
source_resource_id text,
|
||
source_url text
|
||
);
|
||
|
||
COMMENT ON TABLE apia.staging_fermieri IS
|
||
'Tabel de staging pentru importul XLSX→COPY. TRUNCATE între import-uri.';
|
||
|
||
-- ── 2. Scrape log ─────────────────────────────────────────────────────────
|
||
-- One row per CKAN-resource ingest. Useful for "ce am importat când" history.
|
||
CREATE TABLE IF NOT EXISTS apia.scrape_log (
|
||
id bigserial PRIMARY KEY,
|
||
source_dataset_id text NOT NULL,
|
||
source_resource_id text NOT NULL,
|
||
source_url text NOT NULL,
|
||
campaign_year smallint NOT NULL,
|
||
rows_seen integer NOT NULL DEFAULT 0,
|
||
rows_inserted integer NOT NULL DEFAULT 0,
|
||
rows_updated integer NOT NULL DEFAULT 0,
|
||
rows_matched_cui integer NOT NULL DEFAULT 0,
|
||
duration_ms integer NOT NULL DEFAULT 0,
|
||
started_at timestamptz NOT NULL,
|
||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||
error text
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_apia_scrape_log_started ON apia.scrape_log(started_at DESC);
|
||
CREATE INDEX IF NOT EXISTS idx_apia_scrape_log_resource ON apia.scrape_log(source_resource_id);
|
||
|
||
-- ── 3. Materialized view: per-CUI rollup ─────────────────────────────────
|
||
-- Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY apia.mv_per_cui;
|
||
CREATE MATERIALIZED VIEW IF NOT EXISTS apia.mv_per_cui AS
|
||
SELECT
|
||
cui,
|
||
array_agg(DISTINCT campaign_year ORDER BY campaign_year) AS campaign_years,
|
||
array_agg(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS judete,
|
||
SUM(suprafata_ha) AS total_suprafata_ha,
|
||
COUNT(*) AS rows_count,
|
||
MAX(fetched_at) AS last_seen_at
|
||
FROM apia.fermieri
|
||
WHERE cui IS NOT NULL
|
||
GROUP BY cui;
|
||
|
||
CREATE UNIQUE INDEX IF NOT EXISTS idx_apia_mv_per_cui ON apia.mv_per_cui(cui);
|
||
|
||
COMMENT ON MATERIALIZED VIEW apia.mv_per_cui IS
|
||
'Rollup APIA per CUI (doar PJ-uri cu match). '
|
||
'Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY apia.mv_per_cui.';
|
||
|
||
-- ── 4. CUI matcher (call after each ingest) ──────────────────────────────
|
||
-- Populates apia.fermieri.cui by joining name_normalized against
|
||
-- firms.entities. Conservative: requires *unique* match in firms (no
|
||
-- ambiguous trgm collisions).
|
||
CREATE OR REPLACE FUNCTION apia.match_cui()
|
||
RETURNS TABLE(matched_count bigint, ambiguous_count bigint) AS $$
|
||
DECLARE
|
||
v_matched bigint := 0;
|
||
v_ambiguous bigint := 0;
|
||
BEGIN
|
||
-- Heuristic: row is candidate PJ if name has SRL/SA/PFA/II/IF/SC/CABINET.
|
||
UPDATE apia.fermieri f
|
||
SET is_legal_person = TRUE,
|
||
name_normalized = firms.normalize_company_name(f.name)
|
||
WHERE f.is_legal_person IS NULL
|
||
AND f.name ~* '\m(SRL|S\.R\.L\.|S\.A\.|SA|PFA|P\.F\.A\.|II|I\.I\.|IF|I\.F\.|SC|S\.C\.|CABINET|COOPERATIVA|COOP)\M';
|
||
|
||
-- Exact-norm match
|
||
WITH cands AS (
|
||
SELECT f.id, e.cui
|
||
FROM apia.fermieri f
|
||
JOIN firms.entities e
|
||
ON e.name_normalized = f.name_normalized
|
||
WHERE f.cui IS NULL
|
||
AND f.is_legal_person = TRUE
|
||
AND f.name_normalized IS NOT NULL
|
||
),
|
||
uniq AS (
|
||
SELECT id, MIN(cui) AS cui
|
||
FROM cands
|
||
GROUP BY id
|
||
HAVING COUNT(DISTINCT cui) = 1
|
||
),
|
||
upd AS (
|
||
UPDATE apia.fermieri f
|
||
SET cui = u.cui,
|
||
cui_match_method = 'exact_norm',
|
||
cui_match_score = 1.0
|
||
FROM uniq u
|
||
WHERE f.id = u.id
|
||
RETURNING f.id
|
||
)
|
||
SELECT COUNT(*) INTO v_matched FROM upd;
|
||
|
||
-- Trigram fallback for unmatched PJs (threshold 0.85, must be unique)
|
||
WITH cands AS (
|
||
SELECT f.id, e.cui,
|
||
similarity(e.name_normalized, f.name_normalized) AS sim
|
||
FROM apia.fermieri f
|
||
JOIN firms.entities e
|
||
ON e.name_normalized % f.name_normalized
|
||
WHERE f.cui IS NULL
|
||
AND f.is_legal_person = TRUE
|
||
AND f.name_normalized IS NOT NULL
|
||
AND similarity(e.name_normalized, f.name_normalized) >= 0.85
|
||
),
|
||
ranked AS (
|
||
SELECT id, cui, sim,
|
||
COUNT(*) OVER (PARTITION BY id) AS n_cands
|
||
FROM cands
|
||
),
|
||
uniq AS (
|
||
SELECT DISTINCT ON (id) id, cui, sim
|
||
FROM ranked
|
||
WHERE n_cands = 1
|
||
),
|
||
upd AS (
|
||
UPDATE apia.fermieri f
|
||
SET cui = u.cui,
|
||
cui_match_method = 'trgm_unique',
|
||
cui_match_score = u.sim
|
||
FROM uniq u
|
||
WHERE f.id = u.id
|
||
RETURNING f.id
|
||
)
|
||
SELECT COUNT(*) INTO v_ambiguous FROM upd;
|
||
|
||
matched_count := v_matched;
|
||
ambiguous_count := v_ambiguous;
|
||
RETURN NEXT;
|
||
END;
|
||
$$ LANGUAGE plpgsql;
|
||
|
||
COMMENT ON FUNCTION apia.match_cui() IS
|
||
'Matchează apia.fermieri.cui prin firms.entities. Întâi exact_norm, apoi '
|
||
'trgm fallback >=0.85 cu unicitate. Returnează (matched_exact, matched_trgm). '
|
||
'Doar rândurile marcate is_legal_person.';
|