#!/usr/bin/env python3 """ Import ALL SEAP announcement types for 2026 into seap.announcements. Uses data.gov.ro XLSX files for T1, resolves CUI→SIRUTA via cui_location. """ import os, sys, csv from datetime import datetime from pathlib import Path import openpyxl import psycopg2 from psycopg2.extras import execute_values DB_URL = os.environ.get('DATABASE_URL', 'postgresql://architools_user:stictMyFon34!_gonY@10.10.10.166:5432/architools_db') DATA_DIR = Path(__file__).parent / 'data' # SEAP URL templates SEAP_URLS = { 'da': 'https://e-licitatie.ro/pub/direct-acquisition/view/{ref}', 'notificare': 'https://e-licitatie.ro/pub/da-award-notice/view/{ref}', 'initiere': 'https://e-licitatie.ro/pub/notices/ca-notices/view/{ref}', 'contract': 'https://e-licitatie.ro/pub/notices/ca-notices/view/{ref}', 'atribuire_fara': 'https://e-licitatie.ro/pub/notices/ca-notices/view/{ref}', 'modificare': 'https://e-licitatie.ro/pub/notices/ca-notices/view/{ref}', } def seap_url(ann_type, ref_number): """Build SEAP URL from announcement type and reference number.""" # Extract numeric ID from ref: DA37257925 → 37257925 num = ''.join(c for c in str(ref_number) if c.isdigit()) tmpl = SEAP_URLS.get(ann_type, '') return tmpl.format(ref=num) if tmpl and num else None def read_xlsx(fpath): """Yield (headers, row) from XLSX.""" wb = openpyxl.load_workbook(fpath, read_only=True, data_only=True) ws = wb.active rows = ws.iter_rows(values_only=True) headers = [str(h).strip() if h else '' for h in next(rows)] for row in rows: yield headers, row wb.close() def col(headers, *names): """Find column index.""" h_map = {h.strip().upper(): i for i, h in enumerate(headers)} for n in names: if n.upper() in h_map: return h_map[n.upper()] return None def s(row, idx): """Safe string from row.""" if idx is None or idx >= len(row) or row[idx] is None: return None return str(row[idx]).strip() or None def n(row, idx): """Safe numeric from row.""" if idx is None or idx >= len(row) or row[idx] is None: return None try: return float(str(row[idx]).replace(',', '.').replace(' ', '')) except: return None def d(row, idx): """Safe date from row.""" if idx is None or idx >= len(row) or row[idx] is None: return None v = row[idx] if isinstance(v, datetime): return v try: return datetime.fromisoformat(str(v)) except: return None def clean_cui(val): if val is None: return None return str(val).strip().replace('RO', '').replace('ro', '').strip() or None # ── Parsers per type ── def parse_da(headers, row): return { 'type': 'da', 'ref_number': s(row, col(headers, 'Numar achizitie directa')), 'authority_name': s(row, col(headers, 'Autoritate contractanta')), 'authority_cui': clean_cui(s(row, col(headers, 'CUI autoritate contractanta'))), 'title': s(row, col(headers, 'Denumire achizitie')), 'cpv_code': s(row, col(headers, 'Cod CPV')), 'cpv_name': s(row, col(headers, 'Denumire CPV')), 'contract_type': s(row, col(headers, 'Tip contract')), 'publication_date': d(row, col(headers, 'Data publicare')), 'finalization_date': d(row, col(headers, 'Data finalizare')), 'awarded_value': n(row, col(headers, 'Valoare achizitie (RON)')), 'supplier_name': s(row, col(headers, 'Ofertant castigator')), 'supplier_cui': clean_cui(s(row, col(headers, 'CUI ofertant castigator'))), 'eu_funded': s(row, col(headers, 'Finantare prin fonduri comunitare?')), 'eu_program': s(row, col(headers, 'Denumire program')), } def parse_notificare(headers, row): return { 'type': 'notificare', 'ref_number': s(row, col(headers, 'Numar notificare')), 'authority_name': s(row, col(headers, 'Autoritate contractanta')), 'authority_cui': clean_cui(s(row, col(headers, 'CUI autoritate contractanta'))), 'title': s(row, col(headers, 'Obiectul achizitiei')), 'cpv_code': s(row, col(headers, 'Cod CPV')), 'cpv_name': s(row, col(headers, 'Denumire CPV')), 'contract_type': s(row, col(headers, 'Tip contract')), 'publication_date': d(row, col(headers, 'Data publicare')), 'finalization_date': d(row, col(headers, 'Data finalizare')), 'awarded_value': n(row, col(headers, 'Valoare achizitie (RON)')), 'supplier_name': s(row, col(headers, 'Ofertant castigator')), 'supplier_cui': clean_cui(s(row, col(headers, 'CUI ofertant castigator'))), 'eu_funded': s(row, col(headers, 'Finantare prin fonduri comunitare?')), 'eu_program': s(row, col(headers, 'Tipul de proiect/ program')), } def parse_initiere(headers, row): return { 'type': 'initiere', 'ref_number': s(row, col(headers, 'Numar anunt initiere')), 'authority_name': s(row, col(headers, 'Autoritate contractanta')), 'authority_cui': clean_cui(s(row, col(headers, 'CUI autoritate contractanta'))), 'title': s(row, col(headers, 'Denumire procedura')), 'cpv_code': s(row, col(headers, 'Cod CPV')), 'cpv_name': s(row, col(headers, 'Denumire CPV')), 'contract_type': s(row, col(headers, 'Tip contract')), 'publication_date': d(row, col(headers, 'Data publicare')), 'estimated_value': n(row, col(headers, 'Valoare estimata procedura (RON)')), 'currency': s(row, col(headers, 'Moneda')) or 'RON', 'procedure_type': s(row, col(headers, 'Tip procedura')), 'procedure_state': s(row, col(headers, 'Stare procedura')), 'award_type': s(row, col(headers, 'Modalitate de atribuire')), 'has_lots': s(row, col(headers, 'Contractul este impartit in loturi?')), 'joue': s(row, col(headers, 'Anunt cu transmitere la JOUE?')), } def parse_contract(headers, row): # Find the second 'Data publicare' column (index 14, not 5) pub_date_indices = [i for i, h in enumerate(headers) if h.strip().upper() == 'DATA PUBLICARE'] pub_date_idx = pub_date_indices[1] if len(pub_date_indices) > 1 else pub_date_indices[0] if pub_date_indices else None return { 'type': 'contract', 'ref_number': s(row, col(headers, 'Numar anunt atribuire')) or s(row, col(headers, 'Numar contract')), 'authority_name': s(row, col(headers, 'Autoritate contractanta')), 'authority_cui': clean_cui(s(row, col(headers, 'CUI autoritate contractanta'))), 'title': s(row, col(headers, 'Denumire CPV')), 'cpv_code': s(row, col(headers, 'Cod CPV')), 'cpv_name': s(row, col(headers, 'Denumire CPV')), 'contract_type': s(row, col(headers, 'Tip contract')), 'publication_date': d(row, pub_date_idx) if pub_date_idx else None, 'contract_date': d(row, col(headers, 'Data contract')), 'awarded_value': n(row, col(headers, 'Valoare contract (RON)')), 'supplier_name': s(row, col(headers, 'Ofertant castigator')), 'supplier_cui': clean_cui(s(row, col(headers, 'CUI ofertant castigator'))), 'procedure_type': s(row, col(headers, 'Tip procedura')), 'award_type': s(row, col(headers, 'Tip incheiere contract')), 'legislation': s(row, col(headers, 'Tip legislatie')), 'criterion': s(row, col(headers, 'Tip criterii de atribuire')), 'lot_number': n(row, col(headers, 'Numar lot')), } def parse_atribuire_fara(headers, row): return { 'type': 'atribuire_fara', 'ref_number': s(row, col(headers, 'Numar anunt atribuire')), 'authority_name': s(row, col(headers, 'Autoritate contractanta')), 'authority_cui': clean_cui(s(row, col(headers, 'CUI autoritate contractanta'))), 'title': s(row, col(headers, 'Denumire contract')), 'cpv_code': s(row, col(headers, 'Cod CPV')), 'cpv_name': s(row, col(headers, 'Denumire CPV')), 'contract_type': s(row, col(headers, 'Tip contract')), 'publication_date': d(row, col(headers, 'Data publicare')), 'contract_date': d(row, col(headers, 'Data contract')), 'awarded_value': n(row, col(headers, 'Valoare atribuita (RON)')), 'supplier_name': s(row, col(headers, 'Ofertant castigator')), 'supplier_cui': clean_cui(s(row, col(headers, 'CUI ofertant castigator'))), 'procedure_type': s(row, col(headers, 'Tip procedura')), 'legislation': s(row, col(headers, 'Tip legislatie')), 'criterion': s(row, col(headers, 'Criteriu de atribuire')), 'award_type': s(row, col(headers, 'Incheiat prin')), } def parse_modificare(headers, row): return { 'type': 'modificare', 'ref_number': s(row, col(headers, 'Numar anunt atribuire')), 'authority_name': s(row, col(headers, 'Autoritate contractanta')), 'authority_cui': clean_cui(s(row, col(headers, 'CUI autoritate contractanta'))), 'publication_date': d(row, col(headers, 'Data publicare')), 'contract_date': d(row, col(headers, 'Data contract')), 'value_before': n(row, col(headers, 'Valoarea totala actualizata a contractului inainte de modificari')), 'value_after': n(row, col(headers, 'Valoarea totala a contractului dupa modificari')), 'modification_desc': s(row, col(headers, 'Descrierea modificarilor')), } PARSERS = { 'da': parse_da, 'notificare': parse_notificare, 'initiere': parse_initiere, 'contract': parse_contract, 'atribuire_fara': parse_atribuire_fara, 'modificare': parse_modificare, } # ── Files ── FILES_2026_T1 = { 'da': 'https://data.gov.ro/dataset/e8d22de4-f8ce-4b42-8561-98d2481ddef9/resource/5bcff70e-7541-4e7f-86e2-f21b54807e26/download/raport-achizitii-directe-ti-2026.xlsx', 'notificare': 'https://data.gov.ro/dataset/e8d22de4-f8ce-4b42-8561-98d2481ddef9/resource/728c1bb4-c23c-4f5f-9a7d-8dba7d4b8c4d/download/raport-notificari-de-atribuire-la-cumpararea-directa-ti-2026.xlsx', 'initiere': 'https://data.gov.ro/dataset/e8d22de4-f8ce-4b42-8561-98d2481ddef9/resource/5720192a-6c1a-4f40-bccc-9c12bc6a2a8f/download/raport-anunturi-de-initiere-publicate-ti-2026.xlsx', 'contract': 'https://data.gov.ro/dataset/e8d22de4-f8ce-4b42-8561-98d2481ddef9/resource/f78b2b07-48aa-442e-b7e3-4b39f45a0b5b/download/raport-contracte-ti-2026.xlsx', 'atribuire_fara': 'https://data.gov.ro/dataset/e8d22de4-f8ce-4b42-8561-98d2481ddef9/resource/6d72d696-b4ca-40a1-9fbb-5b25f0f40f63/download/raport-anunturi-de-atribuire-la-proceduri-fara-anunt-de-initiere-ti-2026.xlsx', 'modificare': 'https://data.gov.ro/dataset/e8d22de4-f8ce-4b42-8561-98d2481ddef9/resource/6df70e9f-a9cb-443f-b0ee-4d424d51d6b2/download/raport-date-din-modificare-contract-ti-2026.xlsx', } # Use 2025 T1 as fallback if 2026 download fails FILES_2025_T1 = { 'da': 'data/datagov_raport-achizitii_directe_t1_2025.xlsx', 'notificare': 'data/2025_t1_notificari.xlsx', 'initiere': 'data/datagov_raport_anunturi-de-initiere-publicate_t1_2025.xlsx', 'contract': 'data/2025_t1_contracte.xlsx', 'atribuire_fara': 'data/2025_t1_atribuire_fara.xlsx', 'modificare': 'data/2025_t1_modificare.xlsx', } def download(url, label): import urllib.request fname = f"2026_t1_{label}.xlsx" fpath = DATA_DIR / fname if fpath.exists() and fpath.stat().st_size > 1000: print(f" [cached] {fname} ({fpath.stat().st_size // 1024}KB)") return fpath print(f" [download] {label}...") try: urllib.request.urlretrieve(url, fpath) print(f" [done] {fpath.stat().st_size // 1024}KB") return fpath except Exception as e: print(f" [FAIL] {e}") return None def import_file(conn, ann_type, fpath, parser_fn): """Import one XLSX file into seap.announcements.""" cur = conn.cursor() total = 0 skipped = 0 batch = [] for headers, row in read_xlsx(fpath): try: rec = parser_fn(headers, row) except Exception: skipped += 1 continue if not rec or not rec.get('ref_number'): skipped += 1 continue rec['seap_url'] = seap_url(ann_type, rec['ref_number']) batch.append(rec) if len(batch) >= 3000: inserted = _insert_batch(cur, batch) total += inserted skipped += len(batch) - inserted batch = [] conn.commit() print(f" {ann_type}: {total} inserted, {skipped} skipped...") if batch: inserted = _insert_batch(cur, batch) total += inserted skipped += len(batch) - inserted conn.commit() return total, skipped def _insert_batch(cur, batch): cols = ['type', 'ref_number', 'authority_name', 'authority_cui', 'title', 'cpv_code', 'cpv_name', 'contract_type', 'publication_date', 'finalization_date', 'contract_date', 'estimated_value', 'awarded_value', 'currency', 'supplier_name', 'supplier_cui', 'procedure_type', 'procedure_state', 'award_type', 'legislation', 'criterion', 'eu_funded', 'eu_program', 'lot_number', 'has_lots', 'joue', 'value_before', 'value_after', 'modification_desc', 'seap_url'] values = [] for rec in batch: values.append(tuple(rec.get(c) for c in cols)) placeholders = ','.join(['%s'] * len(cols)) col_names = ','.join(cols) try: execute_values(cur, f""" INSERT INTO seap.announcements ({col_names}) VALUES %s ON CONFLICT (type, ref_number) DO NOTHING """, values, template=f"({placeholders})") return cur.rowcount except Exception as e: cur.connection.rollback() print(f" [error] {e}") return 0 def resolve_siruta(conn): """Update authority_siruta from cui_location.""" cur = conn.cursor() cur.execute(""" UPDATE seap.announcements a SET authority_siruta = cl.siruta FROM seap.cui_location cl WHERE a.authority_cui = cl.cui AND cl.siruta IS NOT NULL AND a.authority_siruta IS NULL """) updated = cur.rowcount conn.commit() print(f" SIRUTA resolved: {updated} announcements") # Also resolve supplier cur.execute(""" UPDATE seap.announcements a SET supplier_siruta = cl.siruta FROM seap.cui_location cl WHERE a.supplier_cui = cl.cui AND cl.siruta IS NOT NULL AND a.supplier_siruta IS NULL """) sup_updated = cur.rowcount conn.commit() print(f" Supplier SIRUTA: {sup_updated}") return updated def rebuild_materialized_view(conn): """Rebuild MV using announcements table.""" cur = conn.cursor() cur.execute("DROP MATERIALIZED VIEW IF EXISTS seap.uat_procurement_stats") cur.execute(""" CREATE MATERIALIZED VIEW seap.uat_procurement_stats AS SELECT u.siruta, u.name AS uat_name, u.county, COALESCE(s.da_count, 0)::bigint AS da_count, COALESCE(s.da_value, 0)::numeric AS da_total_value, COALESCE(s.contract_count, 0)::bigint AS notice_count, COALESCE(s.contract_value, 0)::numeric AS notice_total_value, COALESCE(s.total_count, 0)::bigint AS total_contracts, COALESCE(s.total_value, 0)::numeric AS total_value FROM public."GisUat" u LEFT JOIN ( SELECT authority_siruta AS siruta, COUNT(*) FILTER (WHERE type = 'da') AS da_count, SUM(awarded_value) FILTER (WHERE type = 'da') AS da_value, COUNT(*) FILTER (WHERE type IN ('contract', 'atribuire_fara')) AS contract_count, SUM(awarded_value) FILTER (WHERE type IN ('contract', 'atribuire_fara')) AS contract_value, COUNT(*) AS total_count, SUM(COALESCE(awarded_value, estimated_value, 0)) AS total_value FROM seap.announcements WHERE authority_siruta IS NOT NULL GROUP BY authority_siruta ) s ON s.siruta = u.siruta """) cur.execute("CREATE UNIQUE INDEX idx_ups_siruta ON seap.uat_procurement_stats(siruta)") conn.commit() print(" Materialized view rebuilt") def main(): year = sys.argv[1] if len(sys.argv) > 1 else '2026' conn = psycopg2.connect(DB_URL) files = FILES_2026_T1 if year == '2026' else {} local_files = FILES_2025_T1 if year == '2025' else {} print(f"\n=== Import ALL types — {year} T1 — {datetime.now().isoformat()} ===\n") grand_total = 0 for ann_type, parser_fn in PARSERS.items(): print(f"\n── {ann_type.upper()} ──") # Try download 2026, fallback to local 2025 fpath = None if ann_type in files: fpath = download(files[ann_type], ann_type) if not fpath and ann_type in local_files: local = Path(__file__).parent / local_files[ann_type] if local.exists(): fpath = local print(f" [fallback] Using 2025: {local.name}") if not fpath: print(f" [SKIP] No file available") continue inserted, skipped = import_file(conn, ann_type, fpath, parser_fn) grand_total += inserted print(f" Done: {inserted} inserted, {skipped} skipped") print(f"\n── RESOLVE SIRUTA ──") resolve_siruta(conn) print(f"\n── REBUILD MATERIALIZED VIEW ──") rebuild_materialized_view(conn) # Stats cur = conn.cursor() cur.execute("SELECT type, COUNT(*), COALESCE(SUM(awarded_value), 0)::numeric FROM seap.announcements GROUP BY type ORDER BY type") print(f"\n{'='*60}") print(f" {'Type':<20} {'Count':>10} {'Value (RON)':>15}") print(f" {'-'*20} {'-'*10} {'-'*15}") for row in cur.fetchall(): print(f" {row[0]:<20} {row[1]:>10,} {row[2]:>15,.0f}") cur.execute("SELECT COUNT(*) FROM seap.uat_procurement_stats WHERE total_contracts > 0") uats = cur.fetchone()[0] print(f"\n UATs with data: {uats}") print(f" Grand total inserted: {grand_total}") conn.close() if __name__ == '__main__': main()