initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
@@ -0,0 +1,235 @@
|
||||
-- 036_apia.sql
|
||||
-- APIA — Agenția de Plăți și Intervenție pentru Agricultură.
|
||||
-- "Lista fermierilor" — annual list of farmers receiving direct payments
|
||||
-- (subvenții) per UAT campaign. Covers ~each commune that publishes a list
|
||||
-- via data.gov.ro (currently 2024 only; one comuna live, more on the way).
|
||||
--
|
||||
-- Sources investigated 2026-05-10:
|
||||
-- 1. data.gov.ro CKAN — only ONE published "Lista fermieri APIA" XLSX
|
||||
-- lives at /api/3/action/package_show?id=lista-fermierilor-campania-apia-2024
|
||||
-- (single resource: comuna Găgești, jud. Vaslui, 192 farmers).
|
||||
-- The schema is per-comuna so future ingests over the same package
|
||||
-- will multiply rows linearly.
|
||||
-- 2. https://www.apia.org.ro/ — bot-blocked (HTTP 403 from non-browser
|
||||
-- User-Agents). National-level lists exist on APIA's site but require
|
||||
-- JS / browser session to retrieve. Out of scope for this pass.
|
||||
-- 3. AFIR FEGA dump (fonduri.afir_plati WHERE tip_fond='FEGA', 4.29M rows
|
||||
-- for 2023+2024) is the *closest* national equivalent — it contains
|
||||
-- payment amounts but no SUPRAFATA (hectares) and no
|
||||
-- RESPONSABIL UAT / CENTRUL APIA fields.
|
||||
--
|
||||
-- This schema is therefore intentionally narrow but extensible:
|
||||
-- - one row per (campaign_year, name, comuna, sat) — natural composite key
|
||||
-- - source_dataset_id + source_resource_id on every row → idempotent re-ingest
|
||||
-- - cui populated only when the row is a legal person (SC ... SRL / PFA);
|
||||
-- for natural persons (CNP-keyed in source) cui stays NULL
|
||||
--
|
||||
-- Cross-source value:
|
||||
-- apia.fermieri.cui × fonduri.afir_plati(tip_fond='FEGA').cui
|
||||
-- = "Fermier in lista APIA care apare ȘI in plățile FEGA AFIR" — sanity
|
||||
-- check duplicate-receipt audit. APIA list shows hectares declared,
|
||||
-- FEGA shows EUR plătiți; ratio EUR/ha → outliers.
|
||||
-- apia.fermieri.cui × anaf.datornici.cui
|
||||
-- = "Fermier (PFA/SRL) cu datorii la stat care primește subvenții APIA"
|
||||
-- — direct red flag.
|
||||
-- apia.fermieri.name (PF, no CUI) × ani.declaratii.persoana_name
|
||||
-- = persoane cu funcții publice care primesc subvenții agricole.
|
||||
-- apia.fermieri.cui × seap.announcements.supplier_cui
|
||||
-- = ferme care iau și subvenții și contracte publice.
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS apia;
|
||||
|
||||
-- ── 1. Lista fermieri ─────────────────────────────────────────────────────
|
||||
-- One row per farmer × campaign × comuna × sat. Composite uniqueness chosen
|
||||
-- because data.gov.ro source has no national ID column (no CNP/CUI per row).
|
||||
-- 'name' is raw "NUME PRENUME" string from the published XLSX.
|
||||
CREATE TABLE IF NOT EXISTS apia.fermieri (
|
||||
id bigserial PRIMARY KEY,
|
||||
campaign_year smallint NOT NULL, -- e.g. 2024 (also 2023 SUPRAFATA exists in same row but campaign year is publication year)
|
||||
name text NOT NULL, -- raw "NUME PRENUME" or "SC ... SRL"
|
||||
name_normalized text, -- firms.normalize_company_name(name) — only when looks like PJ
|
||||
cui text, -- only if matched to firms.entities (PJ rows like "SC X SRL")
|
||||
cui_match_method text, -- 'exact_norm' | 'trgm_unique' | NULL
|
||||
cui_match_score numeric(4,3),
|
||||
is_legal_person boolean, -- guessed from name shape (SC, SRL, PFA, II, IF, SA prefixes/suffixes)
|
||||
judet text, -- enriched via centru_apia mapping (Găgești → VS Vaslui)
|
||||
comuna_oras text, -- raw "COMUNA/ORAS" cell
|
||||
sat text,
|
||||
centru_apia text, -- "CENTRUL APIA" (e.g. MURGENI)
|
||||
responsabil_uat text, -- "RESPONSABIL UAT 2024" (the UAT employee, not the farmer)
|
||||
suprafata_ha numeric(12,4), -- "SUPRAFATA 2023" hectares, decimal allowed (e.g. 1.04, 12.45)
|
||||
source_dataset_id text NOT NULL, -- CKAN package_id, e.g. 'lista-fermierilor-campania-apia-2024'
|
||||
source_resource_id text NOT NULL, -- CKAN resource_id (UUID)
|
||||
source_url text NOT NULL, -- direct XLSX download URL
|
||||
fetched_at timestamptz NOT NULL DEFAULT now(),
|
||||
-- NULLS NOT DISTINCT: treat NULL sat as a single value so we don't get
|
||||
-- duplicate rows when source omits sat for some farmers.
|
||||
UNIQUE NULLS NOT DISTINCT (campaign_year, name, comuna_oras, sat)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_cui ON apia.fermieri(cui) WHERE cui IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_year ON apia.fermieri(campaign_year);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_judet ON apia.fermieri(judet);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_comuna ON apia.fermieri(comuna_oras);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_centru ON apia.fermieri(centru_apia);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_resource ON apia.fermieri(source_resource_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_suprafata ON apia.fermieri(suprafata_ha DESC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_fermieri_name_trgm ON apia.fermieri USING gin (name_normalized gin_trgm_ops);
|
||||
|
||||
COMMENT ON TABLE apia.fermieri IS
|
||||
'Lista fermierilor publicată de APIA prin UAT-uri pe data.gov.ro. '
|
||||
'Sursă primară: CKAN package "lista-fermierilor-campania-apia-2024". '
|
||||
'Cross-source cu fonduri.afir_plati (FEGA) și anaf.datornici.';
|
||||
COMMENT ON COLUMN apia.fermieri.suprafata_ha IS
|
||||
'Hectare declarate — coloana "SUPRAFATA 2023" (precedent campaign).';
|
||||
COMMENT ON COLUMN apia.fermieri.is_legal_person IS
|
||||
'true = nume conține markeri "SC/SRL/SA/PFA/II/IF/CABINET" → potențial match firms.entities.';
|
||||
COMMENT ON COLUMN apia.fermieri.cui_match_method IS
|
||||
'exact_norm = match exact pe firms.normalize_company_name; '
|
||||
'trgm_unique = match trigram unic peste 0.85; NULL = nepotrivit (probabil PF).';
|
||||
|
||||
-- ── 1b. Staging table (used by importer for COPY → INSERT pipeline) ─────
|
||||
CREATE TABLE IF NOT EXISTS apia.staging_fermieri (
|
||||
campaign_year text,
|
||||
name text,
|
||||
comuna_oras text,
|
||||
sat text,
|
||||
centru_apia text,
|
||||
responsabil_uat text,
|
||||
suprafata_ha text,
|
||||
source_dataset_id text,
|
||||
source_resource_id text,
|
||||
source_url text
|
||||
);
|
||||
|
||||
COMMENT ON TABLE apia.staging_fermieri IS
|
||||
'Tabel de staging pentru importul XLSX→COPY. TRUNCATE între import-uri.';
|
||||
|
||||
-- ── 2. Scrape log ─────────────────────────────────────────────────────────
|
||||
-- One row per CKAN-resource ingest. Useful for "ce am importat când" history.
|
||||
CREATE TABLE IF NOT EXISTS apia.scrape_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
source_dataset_id text NOT NULL,
|
||||
source_resource_id text NOT NULL,
|
||||
source_url text NOT NULL,
|
||||
campaign_year smallint NOT NULL,
|
||||
rows_seen integer NOT NULL DEFAULT 0,
|
||||
rows_inserted integer NOT NULL DEFAULT 0,
|
||||
rows_updated integer NOT NULL DEFAULT 0,
|
||||
rows_matched_cui integer NOT NULL DEFAULT 0,
|
||||
duration_ms integer NOT NULL DEFAULT 0,
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz NOT NULL DEFAULT now(),
|
||||
error text
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_scrape_log_started ON apia.scrape_log(started_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_apia_scrape_log_resource ON apia.scrape_log(source_resource_id);
|
||||
|
||||
-- ── 3. Materialized view: per-CUI rollup ─────────────────────────────────
|
||||
-- Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY apia.mv_per_cui;
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS apia.mv_per_cui AS
|
||||
SELECT
|
||||
cui,
|
||||
array_agg(DISTINCT campaign_year ORDER BY campaign_year) AS campaign_years,
|
||||
array_agg(DISTINCT judet) FILTER (WHERE judet IS NOT NULL) AS judete,
|
||||
SUM(suprafata_ha) AS total_suprafata_ha,
|
||||
COUNT(*) AS rows_count,
|
||||
MAX(fetched_at) AS last_seen_at
|
||||
FROM apia.fermieri
|
||||
WHERE cui IS NOT NULL
|
||||
GROUP BY cui;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_apia_mv_per_cui ON apia.mv_per_cui(cui);
|
||||
|
||||
COMMENT ON MATERIALIZED VIEW apia.mv_per_cui IS
|
||||
'Rollup APIA per CUI (doar PJ-uri cu match). '
|
||||
'Refresh: REFRESH MATERIALIZED VIEW CONCURRENTLY apia.mv_per_cui.';
|
||||
|
||||
-- ── 4. CUI matcher (call after each ingest) ──────────────────────────────
|
||||
-- Populates apia.fermieri.cui by joining name_normalized against
|
||||
-- firms.entities. Conservative: requires *unique* match in firms (no
|
||||
-- ambiguous trgm collisions).
|
||||
CREATE OR REPLACE FUNCTION apia.match_cui()
|
||||
RETURNS TABLE(matched_count bigint, ambiguous_count bigint) AS $$
|
||||
DECLARE
|
||||
v_matched bigint := 0;
|
||||
v_ambiguous bigint := 0;
|
||||
BEGIN
|
||||
-- Heuristic: row is candidate PJ if name has SRL/SA/PFA/II/IF/SC/CABINET.
|
||||
UPDATE apia.fermieri f
|
||||
SET is_legal_person = TRUE,
|
||||
name_normalized = firms.normalize_company_name(f.name)
|
||||
WHERE f.is_legal_person IS NULL
|
||||
AND f.name ~* '\m(SRL|S\.R\.L\.|S\.A\.|SA|PFA|P\.F\.A\.|II|I\.I\.|IF|I\.F\.|SC|S\.C\.|CABINET|COOPERATIVA|COOP)\M';
|
||||
|
||||
-- Exact-norm match
|
||||
WITH cands AS (
|
||||
SELECT f.id, e.cui
|
||||
FROM apia.fermieri f
|
||||
JOIN firms.entities e
|
||||
ON e.name_normalized = f.name_normalized
|
||||
WHERE f.cui IS NULL
|
||||
AND f.is_legal_person = TRUE
|
||||
AND f.name_normalized IS NOT NULL
|
||||
),
|
||||
uniq AS (
|
||||
SELECT id, MIN(cui) AS cui
|
||||
FROM cands
|
||||
GROUP BY id
|
||||
HAVING COUNT(DISTINCT cui) = 1
|
||||
),
|
||||
upd AS (
|
||||
UPDATE apia.fermieri f
|
||||
SET cui = u.cui,
|
||||
cui_match_method = 'exact_norm',
|
||||
cui_match_score = 1.0
|
||||
FROM uniq u
|
||||
WHERE f.id = u.id
|
||||
RETURNING f.id
|
||||
)
|
||||
SELECT COUNT(*) INTO v_matched FROM upd;
|
||||
|
||||
-- Trigram fallback for unmatched PJs (threshold 0.85, must be unique)
|
||||
WITH cands AS (
|
||||
SELECT f.id, e.cui,
|
||||
similarity(e.name_normalized, f.name_normalized) AS sim
|
||||
FROM apia.fermieri f
|
||||
JOIN firms.entities e
|
||||
ON e.name_normalized % f.name_normalized
|
||||
WHERE f.cui IS NULL
|
||||
AND f.is_legal_person = TRUE
|
||||
AND f.name_normalized IS NOT NULL
|
||||
AND similarity(e.name_normalized, f.name_normalized) >= 0.85
|
||||
),
|
||||
ranked AS (
|
||||
SELECT id, cui, sim,
|
||||
COUNT(*) OVER (PARTITION BY id) AS n_cands
|
||||
FROM cands
|
||||
),
|
||||
uniq AS (
|
||||
SELECT DISTINCT ON (id) id, cui, sim
|
||||
FROM ranked
|
||||
WHERE n_cands = 1
|
||||
),
|
||||
upd AS (
|
||||
UPDATE apia.fermieri f
|
||||
SET cui = u.cui,
|
||||
cui_match_method = 'trgm_unique',
|
||||
cui_match_score = u.sim
|
||||
FROM uniq u
|
||||
WHERE f.id = u.id
|
||||
RETURNING f.id
|
||||
)
|
||||
SELECT COUNT(*) INTO v_ambiguous FROM upd;
|
||||
|
||||
matched_count := v_matched;
|
||||
ambiguous_count := v_ambiguous;
|
||||
RETURN NEXT;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
COMMENT ON FUNCTION apia.match_cui() IS
|
||||
'Matchează apia.fermieri.cui prin firms.entities. Întâi exact_norm, apoi '
|
||||
'trgm fallback >=0.85 cu unicitate. Returnează (matched_exact, matched_trgm). '
|
||||
'Doar rândurile marcate is_legal_person.';
|
||||
Reference in New Issue
Block a user