-- 030_ani_schema.sql -- ANI declarații de avere și interese — flagship transparency feature. -- -- Source: declaratii.integritate.eu (e-DAI 2022→), old-declaratii.integritate.eu -- (archive 2008-2022). Public by Law 176/2010, GDPR-safe (no CNP stored). -- -- ~1.3M PDF declarations of Romanian public officials. Cross-references -- politicians × firms-they-own × procurement-contracts (firms.entities, seap.*). -- -- See ANI-PLAN.md for full architecture, volume estimates, and rollout plan. -- This file = Stage 0 (schema only, no data). CREATE SCHEMA IF NOT EXISTS ani; GRANT USAGE ON SCHEMA ani TO PUBLIC; -- ── ani.officials ────────────────────────────────────────────────────────── -- Distinct demnitar/funcționar public. Filled by Stage 4 (entity resolution), -- not by the listing scraper. ani.declaratii.official_id is nullable until -- dedup runs. CREATE TABLE IF NOT EXISTS ani.officials ( id bigserial PRIMARY KEY, normalized_name text NOT NULL, -- lower(unaccent(name)) collapsed display_name text NOT NULL, -- "Popescu Ioan-Vasile" cnp_hash char(64), -- SHA-256 if extractable (rare) first_seen_year smallint, -- min(declaration year) last_seen_year smallint, -- max(declaration year) slug text UNIQUE, -- "popescu-ioan-vasile" + suffix primary_function text, -- most-frequent function primary_judet text, -- most-frequent judet declaration_count integer DEFAULT 0, -- materialized count for UI created_at timestamptz DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_officials_norm_name ON ani.officials (normalized_name); CREATE INDEX IF NOT EXISTS idx_officials_norm_name_trgm ON ani.officials USING gin (normalized_name gin_trgm_ops); -- ── ani.declaratii ───────────────────────────────────────────────────────── -- One row per PDF declaration. Listing scraper fills the metadata; PDF -- downloader fills pdf_path + pdf_sha256; parser fills parse_status. CREATE TABLE IF NOT EXISTS ani.declaratii ( id bigserial PRIMARY KEY, official_id bigint REFERENCES ani.officials(id) ON DELETE SET NULL, -- raw fields straight from portal listing (pre-resolution) raw_official_name text NOT NULL, raw_institution text, raw_function text, raw_localitate text, raw_judet text, -- declaration details year smallint NOT NULL, declaration_type text NOT NULL CHECK (declaration_type IN ('avere','interese','avere+interese')), submission_kind text CHECK (submission_kind IN ('anuala','numire-functie','incetare-functie', 'rectificativa','periodica','altele') OR submission_kind IS NULL), data_completare date, -- source tracking (which portal, which ID) source_portal text NOT NULL CHECK (source_portal IN ('old','new','depozitar')), source_url text NOT NULL, source_id text, -- uniqueIdentifier (old) / _id (new) -- PDF storage pdf_path text, -- relative to /opt/vreaudigital-data/ani pdf_sha256 char(64), pdf_size_bytes integer, fetched_at timestamptz, -- parser state parsed_at timestamptz, parse_status text DEFAULT 'pending' CHECK (parse_status IN ('pending','ok','ocr_required','parse_failed', 'template_unknown','download_failed')), parse_template text, -- '2008-2010' | '2011-2016' | '2017+' | 'edai' parse_error text, inserted_at timestamptz DEFAULT now() ); -- one declaration per (portal, source_id) — primary dedup key CREATE UNIQUE INDEX IF NOT EXISTS idx_declaratii_source ON ani.declaratii (source_portal, source_id) WHERE source_id IS NOT NULL; -- content-hash dedup — same PDF re-uploaded under different IDs CREATE UNIQUE INDEX IF NOT EXISTS idx_declaratii_sha ON ani.declaratii (pdf_sha256) WHERE pdf_sha256 IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_declaratii_official ON ani.declaratii (official_id, year DESC) WHERE official_id IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_declaratii_year ON ani.declaratii (year DESC, declaration_type); CREATE INDEX IF NOT EXISTS idx_declaratii_pending ON ani.declaratii (parse_status) WHERE parse_status IN ('pending','ocr_required'); CREATE INDEX IF NOT EXISTS idx_declaratii_raw_name_trgm ON ani.declaratii USING gin (raw_official_name gin_trgm_ops); CREATE INDEX IF NOT EXISTS idx_declaratii_raw_inst_trgm ON ani.declaratii USING gin (raw_institution gin_trgm_ops); -- ── ani.bunuri ───────────────────────────────────────────────────────────── -- Sections I (imobile) + II (mobile). raw_row_text always preserved for -- audit / debug. CREATE TABLE IF NOT EXISTS ani.bunuri ( id bigserial PRIMARY KEY, declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE, category text NOT NULL CHECK (category IN ('imobil-teren','imobil-cladire','mobil-vehicul', 'mobil-bijuterii','mobil-tablouri-arta','mobil-altele')), subcategory text, -- "agricol"/"intravilan"/"apartament" localitate text, judet text, tara text DEFAULT 'România', year_acquired smallint, mode_acquired text, -- "cumparare"/"mostenire"/"donatie" area_sqm numeric, share_pct numeric, -- 1.0 = full ownership co_owner text, value_lei numeric, value_currency text DEFAULT 'RON', raw_row_text text -- audit ); CREATE INDEX IF NOT EXISTS idx_bunuri_decl ON ani.bunuri (declaration_id); CREATE INDEX IF NOT EXISTS idx_bunuri_judet ON ani.bunuri (judet) WHERE judet IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_bunuri_category ON ani.bunuri (category); -- ── ani.shareholdings ────────────────────────────────────────────────────── -- Section IX (firme deținute / asociate). THE flagship table — joins to -- firms.entities via firm_cui (resolved in Stage 4) and to seap.announcements -- via that CUI for "politician-with-firm-supplier-to-state" recipes. CREATE TABLE IF NOT EXISTS ani.shareholdings ( id bigserial PRIMARY KEY, declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE, firm_name_raw text NOT NULL, -- text from PDF firm_cui text, -- resolved later firm_match_score real, -- pg_trgm similarity firm_match_method text CHECK (firm_match_method IN ('exact_name','trgm','manual','unmatched') OR firm_match_method IS NULL), matched_at timestamptz, role text, -- "actionar"/"asociat"/"administrator"/"membru CA" share_pct numeric, value_lei numeric, category text CHECK (category IN ('societate','asociatie','fundatie','cooperativa', 'oNG','altele') OR category IS NULL), raw_row_text text ); CREATE INDEX IF NOT EXISTS idx_share_decl ON ani.shareholdings (declaration_id); CREATE INDEX IF NOT EXISTS idx_share_cui ON ani.shareholdings (firm_cui) WHERE firm_cui IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_share_name_trgm ON ani.shareholdings USING gin (firm_name_raw gin_trgm_ops); CREATE INDEX IF NOT EXISTS idx_share_unmatched ON ani.shareholdings (firm_match_method) WHERE firm_match_method IS NULL OR firm_match_method = 'unmatched'; -- ── ani.functii ──────────────────────────────────────────────────────────── -- Section VIII — funcții publice și private. Joinable to seap.cui_authority -- (when is_public + institution_cui matches an authority) and firms.entities -- (when is_public = false). CREATE TABLE IF NOT EXISTS ani.functii ( id bigserial PRIMARY KEY, declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE, is_public boolean, function_name text NOT NULL, institution_name text NOT NULL, institution_cui text, -- resolved later start_year smallint, end_year smallint, -- NULL if active salary_lei numeric, -- annual income from this function raw_row_text text ); CREATE INDEX IF NOT EXISTS idx_functii_decl ON ani.functii (declaration_id); CREATE INDEX IF NOT EXISTS idx_functii_inst_cui ON ani.functii (institution_cui) WHERE institution_cui IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_functii_inst_name_trgm ON ani.functii USING gin (institution_name gin_trgm_ops); -- ── ani.donatii ──────────────────────────────────────────────────────────── -- Section V (donații primite). CREATE TABLE IF NOT EXISTS ani.donatii ( id bigserial PRIMARY KEY, declaration_id bigint NOT NULL REFERENCES ani.declaratii(id) ON DELETE CASCADE, donor_name text, donation_type text CHECK (donation_type IN ('bani','imobil','mobil','servicii','altele') OR donation_type IS NULL), value_lei numeric, currency text DEFAULT 'RON', year_received smallint, raw_row_text text ); CREATE INDEX IF NOT EXISTS idx_donatii_decl ON ani.donatii (declaration_id); -- ── Comments ────────────────────────────────────────────────────────────── COMMENT ON SCHEMA ani IS 'ANI declarații de avere și interese. Sources: declaratii.integritate.eu + old-declaratii.integritate.eu. Public by Law 176/2010.'; COMMENT ON TABLE ani.declaratii IS 'One row per PDF declaration. official_id resolved in Stage 4 dedup.'; COMMENT ON TABLE ani.shareholdings IS 'Section IX firme deținute. THE flagship cross-reference: firm_cui joins to firms.entities → seap.announcements.'; COMMENT ON COLUMN ani.declaratii.pdf_path IS 'Relative path under /opt/vreaudigital-data/ani/. Full path: /opt/vreaudigital-data/ani/$pdf_path';