Files
vreau-digital/services/seap-scraper/wsp/client.py
T
Claude VM a6c03a091e initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
2026-05-13 00:10:32 +03:00

225 lines
9.0 KiB
Python

"""
WSP SOAP client — envelope construction + HTTPS+mTLS POST + response parsing.
Critical, non-negotiable rules (validated empirically against e-licitatie.ro:8883):
1. Endpoint URL is case-sensitive: /Pub (capital P).
2. Field children of <tem:request> MUST be in alphabetic order (WCF DataContract).
3. SeapUserCredentials xmlns="http://tempuri.org" (NO trailing slash).
4. SOAPAction header must be quoted: "http://tempuri.org/{contract}/{op}".
5. None values must be encoded as <sic:Field i:nil="true"/>, not omitted —
omitting an alphabetic-order intermediate field causes WCF to silently
null-out subsequent fields (validation cascades).
"""
from __future__ import annotations
import os
import time
from dataclasses import dataclass
from typing import Any, Optional
import requests
from lxml import etree
from .operations import WspOp, PROD_URL, NS_INTEG
NS_TEM = 'http://tempuri.org' # NOTE: no trailing slash for SeapUserCredentials
NS_TEM_BODY = 'http://tempuri.org/' # WITH slash for tem:Op body
NS_XSI = 'http://www.w3.org/2001/XMLSchema-instance'
NS_SOAP = 'http://schemas.xmlsoap.org/soap/envelope/'
# Response namespaces for status extraction
NS_INTEG_BRACED = '{%s}' % NS_INTEG
def _envelope(op: WspOp, fields: dict[str, Any], username: str, password: str) -> bytes:
"""Build a WCF-compatible SOAP envelope.
Children of <tem:request> are emitted in sorted (alphabetic) order.
None values become <sic:F i:nil="true"/>. Other values are str-converted.
"""
parts: list[str] = []
for name in sorted(fields.keys()):
val = fields[name]
if val is None:
parts.append(f' <sic:{name} i:nil="true"/>')
else:
parts.append(f' <sic:{name}>{_xml_escape(str(val))}</sic:{name}>')
body = f'''<soapenv:Envelope xmlns:soapenv="{NS_SOAP}" xmlns:tem="{NS_TEM_BODY}" xmlns:sic="{op.request_ns}" xmlns:i="{NS_XSI}">
<soapenv:Header>
<SeapUserCredentials xmlns="{NS_TEM}" xmlns:i="{NS_XSI}">
<Password xmlns="{NS_INTEG}">{_xml_escape(password)}</Password>
<Username xmlns="{NS_INTEG}">{_xml_escape(username)}</Username>
</SeapUserCredentials>
</soapenv:Header>
<soapenv:Body>
<tem:{op.name}>
<tem:request>
{chr(10).join(parts)}
</tem:request>
</tem:{op.name}>
</soapenv:Body>
</soapenv:Envelope>'''
return body.encode('utf-8')
def _xml_escape(s: str) -> str:
return (s.replace('&', '&amp;').replace('<', '&lt;')
.replace('>', '&gt;').replace('"', '&quot;')
.replace("'", '&apos;'))
@dataclass
class WspResult:
status: str # 'Success' | 'ValidationError' | 'SystemError' | 'AuthFailed' | etc
description: Optional[str]
page_index: int
page_total: int # total ITEM count (not page count). PageSize=100, so pages = ceil(page_total/100)
items_xml: bytes # raw <a:Items>...</a:Items> bytes for parser
raw_envelope: bytes # full response (for debugging / saving)
elapsed_ms: int
items: Optional[list] = None # populated by paginator after parser runs
@property
def success(self) -> bool:
return self.status == 'Success'
page_size: int = 50 # confirmed empirically
@property
def num_pages(self) -> int:
ps = self.page_size or 50
return (self.page_total + ps - 1) // ps if self.page_total > 0 else 0
class WspClient:
"""Thread-safe SOAP client for SEAP WSP. Reuses a single HTTPS session."""
def __init__(self, session: requests.Session, username: str, password: str,
endpoint: str = PROD_URL):
self.session = session
self.username = username
self.password = password
self.endpoint = endpoint
def call(self, op: WspOp, fields: dict[str, Any], timeout: int = 120,
max_retries: int = 3) -> WspResult:
"""Execute a SOAP call with retry on transient errors."""
body = _envelope(op, fields, self.username, self.password)
headers = {
'Content-Type': 'text/xml; charset=utf-8',
'SOAPAction': f'"{op.soap_action}"',
}
last_exc: Optional[Exception] = None
for attempt in range(max_retries):
try:
t0 = time.time()
resp = self.session.post(self.endpoint, data=body,
headers=headers, timeout=timeout)
elapsed_ms = int((time.time() - t0) * 1000)
if resp.status_code in (429, 500, 502, 503, 504):
# 500 includes WCF ActionNotSupported faults — but those
# are deterministic, so don't retry. Differentiate via body.
if b'<faultcode' in resp.content:
# SOAP fault — return as parsed error, no retry
return self._parse_fault(resp.content, elapsed_ms)
# transient — retry with backoff
time.sleep(2 ** attempt)
continue
resp.raise_for_status()
return self._parse_response(resp.content, elapsed_ms)
except (requests.Timeout, requests.ConnectionError) as e:
last_exc = e
time.sleep(2 ** attempt)
raise RuntimeError(f'Exhausted retries on {op.name}: {last_exc}')
def _parse_response(self, content: bytes, elapsed_ms: int) -> WspResult:
"""Extract Status, Description, PageIndex, PageTotal, Items.
SEAP returns MTOM/XOP multipart sometimes — strip wrapping if present.
"""
# Strip MTOM multipart wrapper if present
if content[:4] == b'\r\n--' or content[:2] == b'--':
# Multipart — find the XML part
idx = content.find(b'<s:Envelope')
if idx == -1:
idx = content.find(b'<?xml')
if idx != -1:
end = content.find(b'</s:Envelope>')
if end != -1:
content = content[idx:end + len(b'</s:Envelope>')]
else:
# Sometimes the multipart starts with --uuid... mid-stream
idx = content.find(b'<s:Envelope')
if idx > 0:
content = content[idx:]
end = content.find(b'</s:Envelope>')
if end != -1:
content = content[:end + len(b'</s:Envelope>')]
try:
root = etree.fromstring(content)
except etree.XMLSyntaxError as e:
return WspResult(
status='ParseError', description=str(e),
page_index=0, page_total=0,
items_xml=b'', raw_envelope=content, elapsed_ms=elapsed_ms,
)
status_el = root.find(f'.//{NS_INTEG_BRACED}Status')
desc_el = root.find(f'.//{NS_INTEG_BRACED}Description')
# PageIndex/PageTotal live in a:* namespace (varies per response type)
page_index = 0
page_total = 0
for el in root.iter():
tag = etree.QName(el.tag).localname
if tag == 'PageIndex' and el.text and el.text.isdigit():
page_index = int(el.text)
elif tag == 'PageTotal' and el.text and el.text.isdigit():
page_total = int(el.text)
# Find Items element — varies by namespace
items_el = None
for el in root.iter():
if etree.QName(el.tag).localname == 'Items':
items_el = el
break
items_xml = etree.tostring(items_el) if items_el is not None else b''
return WspResult(
status=status_el.text if status_el is not None else 'UNKNOWN',
description=desc_el.text if desc_el is not None else None,
page_index=page_index,
page_total=page_total,
items_xml=items_xml,
raw_envelope=content,
elapsed_ms=elapsed_ms,
)
def _parse_fault(self, content: bytes, elapsed_ms: int) -> WspResult:
try:
root = etree.fromstring(content)
faultcode = root.find('.//{*}faultcode')
faultstring = root.find('.//{*}faultstring')
status = f'Fault:{faultcode.text}' if faultcode is not None else 'Fault'
desc = faultstring.text if faultstring is not None else None
except Exception:
status = 'Fault:Parse'
desc = content[:200].decode('utf-8', errors='replace')
return WspResult(
status=status, description=desc,
page_index=0, page_total=0,
items_xml=b'', raw_envelope=content, elapsed_ms=elapsed_ms,
)
def make_client_from_env(endpoint: str = PROD_URL) -> WspClient:
"""Build a WspClient using SEAP_USER/SEAP_PASS/SEAP_CERT_KEY env vars."""
from .cert_loader import make_mtls_session_from_env
user = os.environ.get('SEAP_USER')
pwd = os.environ.get('SEAP_PASS')
if not user or not pwd:
raise RuntimeError('SEAP_USER / SEAP_PASS not in env')
session = make_mtls_session_from_env()
return WspClient(session, user, pwd, endpoint=endpoint)