#!/usr/bin/env python3 """ Import Romanian procurement data from TED (Tenders Electronic Daily) API. Free, no auth, detailed data including criteria, deadlines, documents, winners. Covers above-threshold tenders (~12K+ for 2026). """ import json import os import sys import time from datetime import datetime import psycopg2 from psycopg2.extras import Json DB_URL = os.environ.get('DATABASE_URL', 'postgresql://architools_user:stictMyFon34!_gonY@10.10.10.166:5432/architools_db') TED_API = 'https://api.ted.europa.eu/v3/notices/search' FIELDS = [ 'notice-identifier', 'publication-date', 'description-lot', 'description-proc', 'deadline-receipt-tender-date-lot', 'deadline-receipt-tender-time-lot', 'organisation-name-buyer', 'organisation-city-buyer', 'estimated-value-lot', 'estimated-value-cur-lot', 'tender-value', 'tender-value-cur', 'classification-cpv', 'contract-nature', 'winner-name', 'winner-city', 'winner-identifier', 'document-url-lot', 'award-criterion-name-lot', 'award-criterion-number-weight-lot', 'guarantee-required-description-lot', 'duration-period-value-lot', 'place-performance-street-lot', 'subcontracting-description', 'winner-decision-date', ] import urllib.request def ted_search(query, page=1, limit=100): """Search TED API.""" body = json.dumps({ 'query': query, 'limit': limit, 'page': page, 'fields': FIELDS, }).encode() req = urllib.request.Request(TED_API, data=body, headers={ 'Content-Type': 'application/json', }) with urllib.request.urlopen(req, timeout=30) as resp: return json.loads(resp.read()) def extract_text(val): """Extract Romanian text from TED multilingual field.""" if val is None: return None if isinstance(val, dict): return val.get('ron', [val.get('eng', [None])])[0] if val else None if isinstance(val, list): return val[0] if val else None return str(val) def extract_list(val): """Extract list of Romanian texts.""" if val is None: return None if isinstance(val, dict): items = val.get('ron', val.get('eng', [])) return items if isinstance(items, list) else [items] if isinstance(val, list): return val return [str(val)] def parse_notice(notice): """Parse TED notice into our announcement format.""" pub_number = notice.get('publication-number', '') desc = extract_text(notice.get('description-lot')) or extract_text(notice.get('description-proc')) buyer_name = extract_text(notice.get('organisation-name-buyer')) buyer_city = extract_text(notice.get('organisation-city-buyer')) # CPV cpv_list = notice.get('classification-cpv', []) cpv_code = cpv_list[0] if cpv_list else None # Values est_values = notice.get('estimated-value-lot', []) est_value = float(est_values[0]) if est_values else None tender_values = notice.get('tender-value', []) tender_value = float(tender_values[0]) if tender_values else None # Deadline deadlines = notice.get('deadline-receipt-tender-date-lot', []) deadline = deadlines[0] if deadlines else None # Winner — can be list, dict, or string winner_name = extract_text(notice.get('winner-name')) winner_cui = extract_text(notice.get('winner-identifier')) winner_city = extract_text(notice.get('winner-city')) # Documents doc_urls = notice.get('document-url-lot', []) documents = [{'url': u} for u in doc_urls] if doc_urls else None # Criteria crit_names = extract_list(notice.get('award-criterion-name-lot')) crit_weights = notice.get('award-criterion-number-weight-lot', []) criteria = None if crit_names: criteria = [] for i, name in enumerate(crit_names): weight = crit_weights[i] if i < len(crit_weights) else None criteria.append({'name': name, 'weight': weight}) # Duration durations = notice.get('duration-period-value-lot', []) duration = durations[0] if durations else None # Contract nature natures = notice.get('contract-nature', []) contract_type = natures[0] if natures else None type_map = {'services': 'Servicii', 'supplies': 'Furnizare', 'works': 'Lucrări'} contract_type = type_map.get(contract_type, contract_type) # Guarantee guarantee = extract_text(notice.get('guarantee-required-description-lot')) # Links ted_url = None links = notice.get('links', {}) html_links = links.get('html', {}) ted_url = html_links.get('RON') or html_links.get('ENG') xml_url = links.get('xml', {}).get('MUL') return { 'type': 'ted_notice', 'ref_number': f'TED-{pub_number}', 'authority_name': buyer_name, 'authority_cui': None, # TED doesn't have CUI directly 'title': (desc or '')[:500] if desc else None, 'description': desc, 'cpv_code': cpv_code, 'contract_type': contract_type, 'publication_date': notice.get('publication-date'), 'submission_deadline': deadline, 'estimated_value': est_value, 'awarded_value': tender_value, 'currency': 'RON', 'supplier_name': winner_name, 'supplier_cui': winner_cui, 'documents': json.dumps(documents) if documents else None, 'award_criteria': json.dumps(criteria) if criteria else None, 'lots': None, 'seap_url': ted_url, 'details': json.dumps({ 'ted_publication_number': pub_number, 'xml_url': xml_url, 'duration_days': duration, 'guarantee': guarantee, 'buyer_city': buyer_city, 'winner_city': winner_city, 'subcontracting': extract_text(notice.get('subcontracting-description')), }), 'source': 'ted', } def main(): year = sys.argv[1] if len(sys.argv) > 1 else '2026' conn = psycopg2.connect(DB_URL) cur = conn.cursor() query = f'CY=ROU AND PD>{year}0101' print(f'\n=== TED Import — Romania {year} — {datetime.now().isoformat()} ===') # Get total count first result = ted_search(query, page=1, limit=1) total = result.get('totalNoticeCount', 0) print(f'Total notices: {total}') page = 1 limit = 100 inserted = 0 skipped = 0 while True: print(f' Page {page}...') result = ted_search(query, page=page, limit=limit) notices = result.get('notices', []) if not notices: break for notice in notices: parsed = parse_notice(notice) if not parsed['ref_number']: skipped += 1 continue try: cur.execute(""" INSERT INTO seap.announcements (type, ref_number, authority_name, authority_cui, title, description, cpv_code, contract_type, publication_date, submission_deadline, estimated_value, awarded_value, currency, supplier_name, supplier_cui, documents, award_criteria, lots, seap_url, details, source) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s::timestamptz, %s, %s, %s, %s, %s, %s::jsonb, %s::jsonb, %s::jsonb, %s, %s::jsonb, %s) ON CONFLICT (type, ref_number) DO UPDATE SET description = EXCLUDED.description, awarded_value = COALESCE(EXCLUDED.awarded_value, seap.announcements.awarded_value), supplier_name = COALESCE(EXCLUDED.supplier_name, seap.announcements.supplier_name), supplier_cui = COALESCE(EXCLUDED.supplier_cui, seap.announcements.supplier_cui), documents = COALESCE(EXCLUDED.documents, seap.announcements.documents), award_criteria = COALESCE(EXCLUDED.award_criteria, seap.announcements.award_criteria), details = EXCLUDED.details, enriched_at = now() """, ( parsed['type'], parsed['ref_number'], parsed['authority_name'], parsed['authority_cui'], parsed['title'], parsed['description'], parsed['cpv_code'], parsed['contract_type'], parsed['publication_date'], parsed['submission_deadline'], parsed['estimated_value'], parsed['awarded_value'], parsed['currency'], parsed['supplier_name'], parsed['supplier_cui'], parsed['documents'], parsed['award_criteria'], parsed['lots'], parsed['seap_url'], parsed['details'], parsed['source'], )) inserted += 1 except Exception as e: conn.rollback() skipped += 1 if inserted < 5: print(f' Error: {e}') continue conn.commit() print(f' Inserted: {inserted}, Skipped: {skipped}') if len(notices) < limit: break page += 1 time.sleep(0.5) # Be polite # Try to match buyer names to CUI via cui_location print('\nMatching TED buyers to CUI...') cur.execute(""" UPDATE seap.announcements a SET authority_cui = cl.cui, authority_siruta = cl.siruta FROM seap.cui_location cl WHERE a.type = 'ted_notice' AND a.authority_cui IS NULL AND a.authority_name IS NOT NULL AND seap.normalize_locality(cl.name) = seap.normalize_locality(a.authority_name) """) name_matched = cur.rowcount print(f' Matched by name: {name_matched}') # Match supplier CUI cur.execute(""" UPDATE seap.announcements a SET supplier_siruta = cl.siruta FROM seap.cui_location cl WHERE a.type = 'ted_notice' AND a.supplier_cui = cl.cui AND cl.siruta IS NOT NULL AND a.supplier_siruta IS NULL """) sup_matched = cur.rowcount print(f' Supplier SIRUTA: {sup_matched}') conn.commit() print(f'\n=== Done: {inserted} imported, {skipped} skipped ===') conn.close() if __name__ == '__main__': main()