""" Base notice parser — shared logic for CN, PI, RFQ, DC, PC, Rdc, EN notices. CA notice has more complex structure (lots + winners) so it has its own parser. """ from __future__ import annotations from ..xml_utils import ( find_child_local, find_local, find_path, text_under, text_direct, int_under, decimal_under, bool_under, datetime_under, sysitem_name, sysitem_id, ) def parse_basic_notice(el, *, type_tag: str, source_tag: str, notice_id_field: str = None) -> dict | None: """Generic notice parser — works for CN, PI, DC, PC, Rdc, EN, RFQ. type_tag: short type identifier for the row (e.g. 'c_notice', 'pi_notice') source_tag: value for source column (e.g. 'wsp_cnotice') notice_id_field: local name of the ID field (CNoticeId, PiNoticeId, etc.) If None, auto-detect by trying common names. """ notice_no = (text_under(el, 'NoticeNo') or text_under(el, 'CNoticeNumber') or text_under(el, 'PiNoticeNumber') or text_under(el, 'NoticeNumber') or text_under(el, 'RFQInvitationNumber') or text_under(el, 'RFQNoticeNumber') or text_under(el, 'DCNoticeNumber') or text_under(el, 'PCNoticeNumber') or text_under(el, 'RDCNoticeNumber') or text_under(el, 'ENoticeNumber') or text_under(el, 'EAProcedureNumber') or text_under(el, 'DfNoticeNo')) # last-resort fallback if not notice_no: return None notice_id = None for field in (notice_id_field, 'CNoticeId', 'PiNoticeId', 'NoticeId', 'CaNoticeId', 'DCNoticeId', 'PCNoticeId', 'RFQNoticeId', 'RdcNoticeId', 'ENoticeId', 'EAProcedureId', 'RFQInvitationId'): if field: notice_id = int_under(el, field) if notice_id is not None: break general = find_child_local(el, 'General') section1 = find_child_local(el, 'Section1') section2 = find_child_local(el, 'Section2') section4 = find_child_local(el, 'Section4') # Authority auth_addresses = find_path(section1, 'Section1_1', 'CaAddresses') if auth_addresses is None: auth_addresses = find_path(section1, 'Section1_1') auth_info = find_local(auth_addresses, 'EntityInformation') if auth_addresses is not None else None authority_name = text_direct(auth_info, 'Name') if auth_info is not None else None authority_cui = text_direct(auth_info, 'Cif') if auth_info is not None else None authority_address = text_direct(auth_info, 'Address') if auth_info is not None else None authority_email = text_direct(auth_info, 'Email') if auth_info is not None else None authority_phone = text_direct(auth_info, 'Phone') if auth_info is not None else None authority_url = text_direct(auth_info, 'Url') if auth_info is not None else None county_code = sysitem_name(auth_info, 'NutsCode') if auth_info is not None else None entity_id = int_under(general, 'EntityId') if general is not None else None s1_4 = find_child_local(section1, 'Section1_4') if section1 is not None else None authority_type = sysitem_name(s1_4, 'ContractingAuthorityType') s1_5 = find_child_local(section1, 'Section1_5') if section1 is not None else None main_activity = sysitem_name(s1_5, 'MainActivity') # Section 2 — contract s2_1 = find_child_local(section2, 'Section2_1') if section2 is not None else None contract_title = (text_under(general, 'ContractTitle') or text_under(s2_1, 'ContractName') or text_under(s2_1, 'Title')) short_desc = text_under(s2_1, 'ShortContractDescription') main_cpv_code = sysitem_name(s2_1, 'MainCPV') or sysitem_name(s2_1, 'MainCPVCode') main_cpv_id = sysitem_id(s2_1, 'MainCPV') or sysitem_id(s2_1, 'MainCPVCode') contract_type = sysitem_name(s2_1, 'SysAcquisitionContractType') currency = sysitem_name(s2_1, 'Currency') estimated_value = decimal_under(s2_1, 'EstimatedValue') or decimal_under(s2_1, 'TotalValue') has_lots = bool_under(s2_1, 'ContractHasLots') reference_number = text_under(s2_1, 'ReferenceNumber') # Lots — for CN/RFQ/etc., lots in Section2_2 lots = _extract_lots_simple(section2) lots_count = len(lots) if lots else None # Procedure s4_1 = find_child_local(section4, 'Section4_1') if section4 is not None else None procedure_type = sysitem_name(s4_1, 'SysProcedureType') framework_agreement = bool_under(s4_1, 'FrameworkAgreement') # Section 4_2 — deadlines s4_2 = find_child_local(section4, 'Section4_2') if section4 is not None else None deadline_submission = (datetime_under(s4_2, 'TenderAvailabilityDeadline') or datetime_under(s4_2, 'ReceiptTimeLimit') or datetime_under(s4_2, 'ReceiptDeadline')) opening_date = datetime_under(s4_2, 'TenderOpeningDate') # Dates + state publication_date = datetime_under(general, 'PublishDate') legislation = sysitem_name(general, 'SysLegislationType') or sysitem_name(general, 'LegislationType') notice_state = sysitem_name(general, 'SysNoticeState') notice_state_id = sysitem_id(general, 'SysNoticeState') is_utility = bool_under(general, 'IsUtility') notice_no_joue = text_under(general, 'NoticeNoJoue') or text_under(general, 'JOUEPublicationNumber') # Documents documents = _extract_documents(general) return { 'type': type_tag, 'ref_number': f'WSP-{notice_no}', 'authority_name': authority_name, 'authority_cui': authority_cui, 'authority_address': authority_address, 'authority_email': authority_email, 'authority_phone': authority_phone, 'authority_url': authority_url, 'authority_type': authority_type, 'authority_main_activity': main_activity, 'authority_entity_id': entity_id, 'title': contract_title[:1000] if contract_title else None, 'cpv_code': main_cpv_code, 'contract_type': contract_type, 'publication_date': publication_date, 'estimated_value': estimated_value, 'awarded_value': None, 'currency': currency, 'supplier_name': None, 'supplier_cui': None, 'procedure_type': procedure_type, 'procedure_state': notice_state, 'legislation': legislation, 'has_lots': 'da' if has_lots else 'nu' if has_lots is False else None, 'contract_has_lots': has_lots, 'lots_count': lots_count, 'joue': notice_no_joue, 'county_code': county_code, 'notice_state': notice_state, 'notice_state_id': notice_state_id, 'framework_agreement': framework_agreement, 'notice_id_internal': notice_id, 'deadline_submission': deadline_submission, 'opening_date': opening_date, 'documents': documents or None, 'lots': lots or None, 'details': { 'short_description': short_desc, 'reference_number': reference_number, 'main_cpv_id': main_cpv_id, 'is_utility': is_utility, }, 'source': source_tag, } def _extract_lots_simple(section2) -> list[dict]: """Extract Lots list from Section2_2 → Lots → LotInfo.""" if section2 is None: return [] lots_list = find_local(section2, 'Lots') if lots_list is None: return [] out = [] for lot in lots_list: if etree.QName(lot.tag).localname != 'LotInfo': continue lot_data = { 'lot_id': int_under(lot, 'LotID'), 'lot_no': int_under(lot, 'LotNo'), 'title': text_under(lot, 'Title'), 'description': text_under(lot, 'DescriptionOfProcurement'), 'cpv_code': sysitem_name(lot, 'MainCPVCode'), 'estimated_value': _str_decimal(decimal_under(lot, 'EstimatedValue')), 'duration_months': int_under(lot, 'DurationInMonths'), 'duration_days': int_under(lot, 'DurationInDays'), 'currency': sysitem_name(lot, 'Currency'), 'place_of_performance': text_under(lot, 'MainSiteOrPlaceOfPerformance'), 'is_community_financed': bool_under(lot, 'IsCommunityFinanced'), } out.append({k: v for k, v in lot_data.items() if v is not None}) return out def _extract_documents(general) -> list[dict]: if general is None: return [] out = [] for fld in ('NoticeFiles', 'CompanyFiles', 'DfNoticeFiles'): container = find_local(general, fld) if container is None: continue for kvp in container: key = find_local(kvp, 'key') if key is None: key = find_local(kvp, 'Key') val = find_local(kvp, 'value') if val is None: val = find_local(kvp, 'Value') if key is not None and val is not None: out.append({'type': fld, 'name': key.text, 'id': val.text}) return out def _str_decimal(d): return str(d) if d is not None else None from lxml import etree # noqa: E402