"""XML extraction helpers for WSP parsers — namespace-agnostic via local-name match.""" from __future__ import annotations from datetime import datetime from decimal import Decimal, InvalidOperation from typing import Any, Optional from lxml import etree def find_local(el, local_name: str): """Find first descendant whose local name matches (any namespace).""" if el is None: return None for d in el.iter(): if etree.QName(d.tag).localname == local_name: return d return None def find_child_local(el, local_name: str): """Find direct child by local name.""" if el is None: return None for c in el: if etree.QName(c.tag).localname == local_name: return c return None def find_path(el, *local_names: str): """Walk a path of local names: find_path(el, 'General', 'NoticeNo').""" cur = el for name in local_names: cur = find_child_local(cur, name) if cur is None: return None return cur def text(el, *local_names) -> Optional[str]: """Get text under a (possibly nested) path of local names.""" if local_names: node = find_path(el, *local_names) else: node = el if node is None or node.text is None: return None txt = node.text.strip() return txt if txt else None def text_under(el, local_name: str) -> Optional[str]: """Find first descendant by local name, return its text.""" n = find_local(el, local_name) if n is None or n.text is None: return None s = n.text.strip() return s if s else None def text_direct(el, local_name: str) -> Optional[str]: """Get text of a DIRECT child by local name only (not descendants). Use this for EntityInformation.Name/Address/Email/Phone where SysItem-style children (City, Country, NutsCode) also contain a that would shadow. """ n = find_child_local(el, local_name) if n is None or n.text is None: return None s = n.text.strip() return s if s else None def int_under(el, local_name: str) -> Optional[int]: s = text_under(el, local_name) if s is None: return None try: return int(s) except ValueError: return None def decimal_under(el, local_name: str) -> Optional[Decimal]: s = text_under(el, local_name) if s is None: return None try: return Decimal(s) except (InvalidOperation, ValueError): return None def bool_under(el, local_name: str) -> Optional[bool]: s = text_under(el, local_name) if s is None: return None return s.lower() == 'true' def datetime_under(el, local_name: str) -> Optional[datetime]: """Parse ISO datetime. Handles WCF ... wrapper too.""" n = find_local(el, local_name) if n is None: return None # Direct text? if n.text: try: return _parse_iso(n.text.strip()) except (ValueError, AttributeError): pass # WCF wrapper: ... inner = find_local(n, 'DateTime') if inner is not None and inner.text: try: return _parse_iso(inner.text.strip()) except ValueError: return None return None def _parse_iso(s: str) -> datetime: """Parse various ISO 8601 forms returned by SEAP.""" s = s.replace('Z', '+00:00') return datetime.fromisoformat(s) def sysitem_under(el, local_name: str) -> Optional[dict]: """Extract a SysItem-style {Id, Name} structure.""" n = find_local(el, local_name) if n is None: return None id_el = find_local(n, 'Id') name_el = find_local(n, 'Name') out = {} if id_el is not None and id_el.text: try: out['id'] = int(id_el.text) except ValueError: out['id'] = id_el.text if name_el is not None and name_el.text: out['name'] = name_el.text.strip() return out or None def sysitem_name(el, local_name: str) -> Optional[str]: item = sysitem_under(el, local_name) return item.get('name') if item else None def sysitem_id(el, local_name: str) -> Optional[int]: item = sysitem_under(el, local_name) if not item: return None val = item.get('id') return val if isinstance(val, int) else None def to_jsonable(el) -> dict: """Convert an element subtree into a nested dict for JSONB storage. Strips namespaces, keeps local names. Drops nil elements. Mixed content with multiple same-name children becomes a list. """ if el is None: return {} is_nil = el.get('{http://www.w3.org/2001/XMLSchema-instance}nil') == 'true' if is_nil: return None children = list(el) if not children: # leaf if el.text and el.text.strip(): return el.text.strip() return None out: dict = {} for c in children: key = etree.QName(c.tag).localname val = to_jsonable(c) if val is None: continue if key in out: if not isinstance(out[key], list): out[key] = [out[key]] out[key].append(val) else: out[key] = val return out