a6c03a091e
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
294 lines
10 KiB
Python
294 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Import Romanian procurement data from TED (Tenders Electronic Daily) API.
|
|
Free, no auth, detailed data including criteria, deadlines, documents, winners.
|
|
Covers above-threshold tenders (~12K+ for 2026).
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
from datetime import datetime
|
|
|
|
import psycopg2
|
|
from psycopg2.extras import Json
|
|
|
|
DB_URL = os.environ.get('DATABASE_URL',
|
|
'postgresql://architools_user:stictMyFon34!_gonY@10.10.10.166:5432/architools_db')
|
|
|
|
TED_API = 'https://api.ted.europa.eu/v3/notices/search'
|
|
|
|
FIELDS = [
|
|
'notice-identifier',
|
|
'publication-date',
|
|
'description-lot', 'description-proc',
|
|
'deadline-receipt-tender-date-lot', 'deadline-receipt-tender-time-lot',
|
|
'organisation-name-buyer', 'organisation-city-buyer',
|
|
'estimated-value-lot', 'estimated-value-cur-lot',
|
|
'tender-value', 'tender-value-cur',
|
|
'classification-cpv', 'contract-nature',
|
|
'winner-name', 'winner-city', 'winner-identifier',
|
|
'document-url-lot',
|
|
'award-criterion-name-lot', 'award-criterion-number-weight-lot',
|
|
'guarantee-required-description-lot',
|
|
'duration-period-value-lot',
|
|
'place-performance-street-lot',
|
|
'subcontracting-description',
|
|
'winner-decision-date',
|
|
]
|
|
|
|
import urllib.request
|
|
|
|
|
|
def ted_search(query, page=1, limit=100):
|
|
"""Search TED API."""
|
|
body = json.dumps({
|
|
'query': query,
|
|
'limit': limit,
|
|
'page': page,
|
|
'fields': FIELDS,
|
|
}).encode()
|
|
|
|
req = urllib.request.Request(TED_API, data=body, headers={
|
|
'Content-Type': 'application/json',
|
|
})
|
|
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
return json.loads(resp.read())
|
|
|
|
|
|
def extract_text(val):
|
|
"""Extract Romanian text from TED multilingual field."""
|
|
if val is None:
|
|
return None
|
|
if isinstance(val, dict):
|
|
return val.get('ron', [val.get('eng', [None])])[0] if val else None
|
|
if isinstance(val, list):
|
|
return val[0] if val else None
|
|
return str(val)
|
|
|
|
|
|
def extract_list(val):
|
|
"""Extract list of Romanian texts."""
|
|
if val is None:
|
|
return None
|
|
if isinstance(val, dict):
|
|
items = val.get('ron', val.get('eng', []))
|
|
return items if isinstance(items, list) else [items]
|
|
if isinstance(val, list):
|
|
return val
|
|
return [str(val)]
|
|
|
|
|
|
def parse_notice(notice):
|
|
"""Parse TED notice into our announcement format."""
|
|
pub_number = notice.get('publication-number', '')
|
|
desc = extract_text(notice.get('description-lot')) or extract_text(notice.get('description-proc'))
|
|
buyer_name = extract_text(notice.get('organisation-name-buyer'))
|
|
buyer_city = extract_text(notice.get('organisation-city-buyer'))
|
|
|
|
# CPV
|
|
cpv_list = notice.get('classification-cpv', [])
|
|
cpv_code = cpv_list[0] if cpv_list else None
|
|
|
|
# Values
|
|
est_values = notice.get('estimated-value-lot', [])
|
|
est_value = float(est_values[0]) if est_values else None
|
|
tender_values = notice.get('tender-value', [])
|
|
tender_value = float(tender_values[0]) if tender_values else None
|
|
|
|
# Deadline
|
|
deadlines = notice.get('deadline-receipt-tender-date-lot', [])
|
|
deadline = deadlines[0] if deadlines else None
|
|
|
|
# Winner — can be list, dict, or string
|
|
winner_name = extract_text(notice.get('winner-name'))
|
|
winner_cui = extract_text(notice.get('winner-identifier'))
|
|
winner_city = extract_text(notice.get('winner-city'))
|
|
|
|
# Documents
|
|
doc_urls = notice.get('document-url-lot', [])
|
|
documents = [{'url': u} for u in doc_urls] if doc_urls else None
|
|
|
|
# Criteria
|
|
crit_names = extract_list(notice.get('award-criterion-name-lot'))
|
|
crit_weights = notice.get('award-criterion-number-weight-lot', [])
|
|
criteria = None
|
|
if crit_names:
|
|
criteria = []
|
|
for i, name in enumerate(crit_names):
|
|
weight = crit_weights[i] if i < len(crit_weights) else None
|
|
criteria.append({'name': name, 'weight': weight})
|
|
|
|
# Duration
|
|
durations = notice.get('duration-period-value-lot', [])
|
|
duration = durations[0] if durations else None
|
|
|
|
# Contract nature
|
|
natures = notice.get('contract-nature', [])
|
|
contract_type = natures[0] if natures else None
|
|
type_map = {'services': 'Servicii', 'supplies': 'Furnizare', 'works': 'Lucrări'}
|
|
contract_type = type_map.get(contract_type, contract_type)
|
|
|
|
# Guarantee
|
|
guarantee = extract_text(notice.get('guarantee-required-description-lot'))
|
|
|
|
# Links
|
|
ted_url = None
|
|
links = notice.get('links', {})
|
|
html_links = links.get('html', {})
|
|
ted_url = html_links.get('RON') or html_links.get('ENG')
|
|
xml_url = links.get('xml', {}).get('MUL')
|
|
|
|
return {
|
|
'type': 'ted_notice',
|
|
'ref_number': f'TED-{pub_number}',
|
|
'authority_name': buyer_name,
|
|
'authority_cui': None, # TED doesn't have CUI directly
|
|
'title': (desc or '')[:500] if desc else None,
|
|
'description': desc,
|
|
'cpv_code': cpv_code,
|
|
'contract_type': contract_type,
|
|
'publication_date': notice.get('publication-date'),
|
|
'submission_deadline': deadline,
|
|
'estimated_value': est_value,
|
|
'awarded_value': tender_value,
|
|
'currency': 'RON',
|
|
'supplier_name': winner_name,
|
|
'supplier_cui': winner_cui,
|
|
'documents': json.dumps(documents) if documents else None,
|
|
'award_criteria': json.dumps(criteria) if criteria else None,
|
|
'lots': None,
|
|
'seap_url': ted_url,
|
|
'details': json.dumps({
|
|
'ted_publication_number': pub_number,
|
|
'xml_url': xml_url,
|
|
'duration_days': duration,
|
|
'guarantee': guarantee,
|
|
'buyer_city': buyer_city,
|
|
'winner_city': winner_city,
|
|
'subcontracting': extract_text(notice.get('subcontracting-description')),
|
|
}),
|
|
'source': 'ted',
|
|
}
|
|
|
|
|
|
def main():
|
|
year = sys.argv[1] if len(sys.argv) > 1 else '2026'
|
|
conn = psycopg2.connect(DB_URL)
|
|
cur = conn.cursor()
|
|
|
|
query = f'CY=ROU AND PD>{year}0101'
|
|
print(f'\n=== TED Import — Romania {year} — {datetime.now().isoformat()} ===')
|
|
|
|
# Get total count first
|
|
result = ted_search(query, page=1, limit=1)
|
|
total = result.get('totalNoticeCount', 0)
|
|
print(f'Total notices: {total}')
|
|
|
|
page = 1
|
|
limit = 100
|
|
inserted = 0
|
|
skipped = 0
|
|
|
|
while True:
|
|
print(f' Page {page}...')
|
|
result = ted_search(query, page=page, limit=limit)
|
|
notices = result.get('notices', [])
|
|
|
|
if not notices:
|
|
break
|
|
|
|
for notice in notices:
|
|
parsed = parse_notice(notice)
|
|
if not parsed['ref_number']:
|
|
skipped += 1
|
|
continue
|
|
|
|
try:
|
|
cur.execute("""
|
|
INSERT INTO seap.announcements
|
|
(type, ref_number, authority_name, authority_cui,
|
|
title, description, cpv_code, contract_type,
|
|
publication_date, submission_deadline,
|
|
estimated_value, awarded_value, currency,
|
|
supplier_name, supplier_cui,
|
|
documents, award_criteria, lots,
|
|
seap_url, details, source)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s,
|
|
%s::timestamptz, %s, %s, %s, %s, %s,
|
|
%s::jsonb, %s::jsonb, %s::jsonb,
|
|
%s, %s::jsonb, %s)
|
|
ON CONFLICT (type, ref_number) DO UPDATE SET
|
|
description = EXCLUDED.description,
|
|
awarded_value = COALESCE(EXCLUDED.awarded_value, seap.announcements.awarded_value),
|
|
supplier_name = COALESCE(EXCLUDED.supplier_name, seap.announcements.supplier_name),
|
|
supplier_cui = COALESCE(EXCLUDED.supplier_cui, seap.announcements.supplier_cui),
|
|
documents = COALESCE(EXCLUDED.documents, seap.announcements.documents),
|
|
award_criteria = COALESCE(EXCLUDED.award_criteria, seap.announcements.award_criteria),
|
|
details = EXCLUDED.details,
|
|
enriched_at = now()
|
|
""", (
|
|
parsed['type'], parsed['ref_number'], parsed['authority_name'],
|
|
parsed['authority_cui'], parsed['title'], parsed['description'],
|
|
parsed['cpv_code'], parsed['contract_type'],
|
|
parsed['publication_date'], parsed['submission_deadline'],
|
|
parsed['estimated_value'], parsed['awarded_value'], parsed['currency'],
|
|
parsed['supplier_name'], parsed['supplier_cui'],
|
|
parsed['documents'], parsed['award_criteria'], parsed['lots'],
|
|
parsed['seap_url'], parsed['details'], parsed['source'],
|
|
))
|
|
inserted += 1
|
|
except Exception as e:
|
|
conn.rollback()
|
|
skipped += 1
|
|
if inserted < 5:
|
|
print(f' Error: {e}')
|
|
continue
|
|
|
|
conn.commit()
|
|
print(f' Inserted: {inserted}, Skipped: {skipped}')
|
|
|
|
if len(notices) < limit:
|
|
break
|
|
|
|
page += 1
|
|
time.sleep(0.5) # Be polite
|
|
|
|
# Try to match buyer names to CUI via cui_location
|
|
print('\nMatching TED buyers to CUI...')
|
|
cur.execute("""
|
|
UPDATE seap.announcements a
|
|
SET authority_cui = cl.cui,
|
|
authority_siruta = cl.siruta
|
|
FROM seap.cui_location cl
|
|
WHERE a.type = 'ted_notice'
|
|
AND a.authority_cui IS NULL
|
|
AND a.authority_name IS NOT NULL
|
|
AND seap.normalize_locality(cl.name) = seap.normalize_locality(a.authority_name)
|
|
""")
|
|
name_matched = cur.rowcount
|
|
print(f' Matched by name: {name_matched}')
|
|
|
|
# Match supplier CUI
|
|
cur.execute("""
|
|
UPDATE seap.announcements a
|
|
SET supplier_siruta = cl.siruta
|
|
FROM seap.cui_location cl
|
|
WHERE a.type = 'ted_notice'
|
|
AND a.supplier_cui = cl.cui
|
|
AND cl.siruta IS NOT NULL
|
|
AND a.supplier_siruta IS NULL
|
|
""")
|
|
sup_matched = cur.rowcount
|
|
print(f' Supplier SIRUTA: {sup_matched}')
|
|
conn.commit()
|
|
|
|
print(f'\n=== Done: {inserted} imported, {skipped} skipped ===')
|
|
conn.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|