a6c03a091e
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
212 lines
11 KiB
SQL
212 lines
11 KiB
SQL
-- 030_ani_schema.sql
|
||
-- ANI declarații de avere și interese — flagship transparency feature.
|
||
--
|
||
-- Source: declaratii.integritate.eu (e-DAI 2022→), old-declaratii.integritate.eu
|
||
-- (archive 2008-2022). Public by Law 176/2010, GDPR-safe (no CNP stored).
|
||
--
|
||
-- ~1.3M PDF declarations of Romanian public officials. Cross-references
|
||
-- politicians × firms-they-own × procurement-contracts (firms.entities, seap.*).
|
||
--
|
||
-- See ANI-PLAN.md for full architecture, volume estimates, and rollout plan.
|
||
-- This file = Stage 0 (schema only, no data).
|
||
|
||
CREATE SCHEMA IF NOT EXISTS ani;
|
||
GRANT USAGE ON SCHEMA ani TO PUBLIC;
|
||
|
||
|
||
-- ── ani.officials ──────────────────────────────────────────────────────────
|
||
-- Distinct demnitar/funcționar public. Filled by Stage 4 (entity resolution),
|
||
-- not by the listing scraper. ani.declaratii.official_id is nullable until
|
||
-- dedup runs.
|
||
CREATE TABLE IF NOT EXISTS ani.officials (
|
||
id bigserial PRIMARY KEY,
|
||
normalized_name text NOT NULL, -- lower(unaccent(name)) collapsed
|
||
display_name text NOT NULL, -- "Popescu Ioan-Vasile"
|
||
cnp_hash char(64), -- SHA-256 if extractable (rare)
|
||
first_seen_year smallint, -- min(declaration year)
|
||
last_seen_year smallint, -- max(declaration year)
|
||
slug text UNIQUE, -- "popescu-ioan-vasile" + suffix
|
||
primary_function text, -- most-frequent function
|
||
primary_judet text, -- most-frequent judet
|
||
declaration_count integer DEFAULT 0, -- materialized count for UI
|
||
created_at timestamptz DEFAULT now()
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_officials_norm_name
|
||
ON ani.officials (normalized_name);
|
||
CREATE INDEX IF NOT EXISTS idx_officials_norm_name_trgm
|
||
ON ani.officials USING gin (normalized_name gin_trgm_ops);
|
||
|
||
|
||
-- ── ani.declaratii ─────────────────────────────────────────────────────────
|
||
-- One row per PDF declaration. Listing scraper fills the metadata; PDF
|
||
-- downloader fills pdf_path + pdf_sha256; parser fills parse_status.
|
||
CREATE TABLE IF NOT EXISTS ani.declaratii (
|
||
id bigserial PRIMARY KEY,
|
||
official_id bigint REFERENCES ani.officials(id) ON DELETE SET NULL,
|
||
-- raw fields straight from portal listing (pre-resolution)
|
||
raw_official_name text NOT NULL,
|
||
raw_institution text,
|
||
raw_function text,
|
||
raw_localitate text,
|
||
raw_judet text,
|
||
-- declaration details
|
||
year smallint NOT NULL,
|
||
declaration_type text NOT NULL CHECK (declaration_type IN
|
||
('avere','interese','avere+interese')),
|
||
submission_kind text CHECK (submission_kind IN
|
||
('anuala','numire-functie','incetare-functie',
|
||
'rectificativa','periodica','altele') OR
|
||
submission_kind IS NULL),
|
||
data_completare date,
|
||
-- source tracking (which portal, which ID)
|
||
source_portal text NOT NULL CHECK (source_portal IN
|
||
('old','new','depozitar')),
|
||
source_url text NOT NULL,
|
||
source_id text, -- uniqueIdentifier (old) / _id (new)
|
||
-- PDF storage
|
||
pdf_path text, -- relative to /opt/vreaudigital-data/ani
|
||
pdf_sha256 char(64),
|
||
pdf_size_bytes integer,
|
||
fetched_at timestamptz,
|
||
-- parser state
|
||
parsed_at timestamptz,
|
||
parse_status text DEFAULT 'pending' CHECK (parse_status IN
|
||
('pending','ok','ocr_required','parse_failed',
|
||
'template_unknown','download_failed')),
|
||
parse_template text, -- '2008-2010' | '2011-2016' | '2017+' | 'edai'
|
||
parse_error text,
|
||
inserted_at timestamptz DEFAULT now()
|
||
);
|
||
|
||
-- one declaration per (portal, source_id) — primary dedup key
|
||
CREATE UNIQUE INDEX IF NOT EXISTS idx_declaratii_source
|
||
ON ani.declaratii (source_portal, source_id) WHERE source_id IS NOT NULL;
|
||
-- content-hash dedup — same PDF re-uploaded under different IDs
|
||
CREATE UNIQUE INDEX IF NOT EXISTS idx_declaratii_sha
|
||
ON ani.declaratii (pdf_sha256) WHERE pdf_sha256 IS NOT NULL;
|
||
CREATE INDEX IF NOT EXISTS idx_declaratii_official
|
||
ON ani.declaratii (official_id, year DESC) WHERE official_id IS NOT NULL;
|
||
CREATE INDEX IF NOT EXISTS idx_declaratii_year
|
||
ON ani.declaratii (year DESC, declaration_type);
|
||
CREATE INDEX IF NOT EXISTS idx_declaratii_pending
|
||
ON ani.declaratii (parse_status) WHERE parse_status IN ('pending','ocr_required');
|
||
CREATE INDEX IF NOT EXISTS idx_declaratii_raw_name_trgm
|
||
ON ani.declaratii USING gin (raw_official_name gin_trgm_ops);
|
||
CREATE INDEX IF NOT EXISTS idx_declaratii_raw_inst_trgm
|
||
ON ani.declaratii USING gin (raw_institution gin_trgm_ops);
|
||
|
||
|
||
-- ── ani.bunuri ─────────────────────────────────────────────────────────────
|
||
-- Sections I (imobile) + II (mobile). raw_row_text always preserved for
|
||
-- audit / debug.
|
||
CREATE TABLE IF NOT EXISTS ani.bunuri (
|
||
id bigserial PRIMARY KEY,
|
||
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
|
||
category text NOT NULL CHECK (category IN
|
||
('imobil-teren','imobil-cladire','mobil-vehicul',
|
||
'mobil-bijuterii','mobil-tablouri-arta','mobil-altele')),
|
||
subcategory text, -- "agricol"/"intravilan"/"apartament"
|
||
localitate text,
|
||
judet text,
|
||
tara text DEFAULT 'România',
|
||
year_acquired smallint,
|
||
mode_acquired text, -- "cumparare"/"mostenire"/"donatie"
|
||
area_sqm numeric,
|
||
share_pct numeric, -- 1.0 = full ownership
|
||
co_owner text,
|
||
value_lei numeric,
|
||
value_currency text DEFAULT 'RON',
|
||
raw_row_text text -- audit
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_bunuri_decl ON ani.bunuri (declaration_id);
|
||
CREATE INDEX IF NOT EXISTS idx_bunuri_judet ON ani.bunuri (judet) WHERE judet IS NOT NULL;
|
||
CREATE INDEX IF NOT EXISTS idx_bunuri_category ON ani.bunuri (category);
|
||
|
||
|
||
-- ── ani.shareholdings ──────────────────────────────────────────────────────
|
||
-- Section IX (firme deținute / asociate). THE flagship table — joins to
|
||
-- firms.entities via firm_cui (resolved in Stage 4) and to seap.announcements
|
||
-- via that CUI for "politician-with-firm-supplier-to-state" recipes.
|
||
CREATE TABLE IF NOT EXISTS ani.shareholdings (
|
||
id bigserial PRIMARY KEY,
|
||
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
|
||
firm_name_raw text NOT NULL, -- text from PDF
|
||
firm_cui text, -- resolved later
|
||
firm_match_score real, -- pg_trgm similarity
|
||
firm_match_method text CHECK (firm_match_method IN
|
||
('exact_name','trgm','manual','unmatched') OR
|
||
firm_match_method IS NULL),
|
||
matched_at timestamptz,
|
||
role text, -- "actionar"/"asociat"/"administrator"/"membru CA"
|
||
share_pct numeric,
|
||
value_lei numeric,
|
||
category text CHECK (category IN
|
||
('societate','asociatie','fundatie','cooperativa',
|
||
'oNG','altele') OR category IS NULL),
|
||
raw_row_text text
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_share_decl ON ani.shareholdings (declaration_id);
|
||
CREATE INDEX IF NOT EXISTS idx_share_cui
|
||
ON ani.shareholdings (firm_cui) WHERE firm_cui IS NOT NULL;
|
||
CREATE INDEX IF NOT EXISTS idx_share_name_trgm
|
||
ON ani.shareholdings USING gin (firm_name_raw gin_trgm_ops);
|
||
CREATE INDEX IF NOT EXISTS idx_share_unmatched
|
||
ON ani.shareholdings (firm_match_method)
|
||
WHERE firm_match_method IS NULL OR firm_match_method = 'unmatched';
|
||
|
||
|
||
-- ── ani.functii ────────────────────────────────────────────────────────────
|
||
-- Section VIII — funcții publice și private. Joinable to seap.cui_authority
|
||
-- (when is_public + institution_cui matches an authority) and firms.entities
|
||
-- (when is_public = false).
|
||
CREATE TABLE IF NOT EXISTS ani.functii (
|
||
id bigserial PRIMARY KEY,
|
||
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
|
||
is_public boolean,
|
||
function_name text NOT NULL,
|
||
institution_name text NOT NULL,
|
||
institution_cui text, -- resolved later
|
||
start_year smallint,
|
||
end_year smallint, -- NULL if active
|
||
salary_lei numeric, -- annual income from this function
|
||
raw_row_text text
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_functii_decl ON ani.functii (declaration_id);
|
||
CREATE INDEX IF NOT EXISTS idx_functii_inst_cui
|
||
ON ani.functii (institution_cui) WHERE institution_cui IS NOT NULL;
|
||
CREATE INDEX IF NOT EXISTS idx_functii_inst_name_trgm
|
||
ON ani.functii USING gin (institution_name gin_trgm_ops);
|
||
|
||
|
||
-- ── ani.donatii ────────────────────────────────────────────────────────────
|
||
-- Section V (donații primite).
|
||
CREATE TABLE IF NOT EXISTS ani.donatii (
|
||
id bigserial PRIMARY KEY,
|
||
declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE,
|
||
donor_name text,
|
||
donation_type text CHECK (donation_type IN
|
||
('bani','imobil','mobil','servicii','altele') OR
|
||
donation_type IS NULL),
|
||
value_lei numeric,
|
||
currency text DEFAULT 'RON',
|
||
year_received smallint,
|
||
raw_row_text text
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_donatii_decl ON ani.donatii (declaration_id);
|
||
|
||
|
||
-- ── Comments ──────────────────────────────────────────────────────────────
|
||
COMMENT ON SCHEMA ani IS
|
||
'ANI declarații de avere și interese. Sources: declaratii.integritate.eu + old-declaratii.integritate.eu. Public by Law 176/2010.';
|
||
COMMENT ON TABLE ani.declaratii IS
|
||
'One row per PDF declaration. official_id resolved in Stage 4 dedup.';
|
||
COMMENT ON TABLE ani.shareholdings IS
|
||
'Section IX firme deținute. THE flagship cross-reference: firm_cui joins to firms.entities → seap.announcements.';
|
||
COMMENT ON COLUMN ani.declaratii.pdf_path IS
|
||
'Relative path under /opt/vreaudigital-data/ani/. Full path: /opt/vreaudigital-data/ani/$pdf_path';
|