#!/usr/bin/env python3 """ SEAP WSP validation suite — runs all tests needed before building the production sync. Reads SEAP_USER/SEAP_PASS/SEAP_CERT_KEY from env (loaded from Infisical /seap), extracts cert+key transiently to /dev/shm, shreds at exit. Tests: T1: every SU_* unprefixed operation (public-data feeds) T2: every Su* operation (Beletage-scoped) T3: pagination + volume probe (1 day vs 7 day windows) T4: rate-limit behavior (sustained calls) T5: SuContracts for Beletage's own data Usage: source ~/Code/claude-dotfiles/load-infisical-path.sh /seap cd services/seap-scraper ./.venv/bin/python wsp_validate.py """ import atexit import os import re import subprocess import sys import time from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timedelta from pathlib import Path import requests from requests.adapters import HTTPAdapter from urllib3.util.ssl_ import create_urllib3_context PROD_URL = 'https://e-licitatie.ro:8883/Pub' CERT_DIR = Path('/dev/shm') CRT_PATH = CERT_DIR / f'wsp_{os.getpid()}.crt' KEY_PATH = CERT_DIR / f'wsp_{os.getpid()}.key' P12_PATH = Path(__file__).parent / 'credentials' / '50076FB3826FADA540ACFB19.p12' # Operations grouped by contract SUPPLIER_PUB_OPS = [ # SU_* — public data accessible via supplier WSP 'SU_PiNotices', 'SU_CNotices', 'SU_CaNotices', 'SU_DCNotices', 'SU_PCNotices', 'SU_RfqInvitations', 'SU_RfqNotices', 'SU_RdcNotices', 'SU_EAProcedure', 'SU_ENotices', ] SUPPLIER_OWN_OPS = [ # Su* + Catalog_* — Beletage-scoped 'SuContracts', 'SuInvoices', 'SuDirectAcquisitions', 'Catalog_ListItems', ] # ── Cert handling ── def setup_certs(): pwd = os.environ['SEAP_CERT_KEY'] subprocess.run( ['openssl', 'pkcs12', '-in', str(P12_PATH), '-clcerts', '-nokeys', '-passin', 'env:SEAP_CERT_KEY', '-out', str(CRT_PATH)], check=True, env={**os.environ, 'SEAP_CERT_KEY': pwd}, stderr=subprocess.DEVNULL, ) subprocess.run( ['openssl', 'pkcs12', '-in', str(P12_PATH), '-nocerts', '-nodes', '-passin', 'env:SEAP_CERT_KEY', '-out', str(KEY_PATH)], check=True, env={**os.environ, 'SEAP_CERT_KEY': pwd}, stderr=subprocess.DEVNULL, ) CRT_PATH.chmod(0o600) KEY_PATH.chmod(0o600) def cleanup_certs(): for p in (CRT_PATH, KEY_PATH): if p.exists(): try: subprocess.run(['shred', '-u', str(p)], check=False, stderr=subprocess.DEVNULL) except Exception: p.unlink(missing_ok=True) atexit.register(cleanup_certs) # ── SOAP call ── NS_TEM = 'http://tempuri.org/' NS_SIC_INTEG = 'http://schemas.datacontract.org/2004/07/SICAP.Service.Integration' NS_SIC_MODEL = 'http://schemas.datacontract.org/2004/07/SICAP.Supplier.Interface.Model' # Per-operation namespace overrides (Request type lives in different sub-namespace) OP_NAMESPACE = { 'SuContracts': NS_SIC_MODEL + '.Contracts', 'SuContractDownload': NS_SIC_MODEL + '.Contracts', # SuInvoices, SuDirectAcquisitions, Catalog_* stay on base NS_SIC_MODEL } ENVELOPE_TPL = ''' {password} {username} {fields} ''' def build_envelope(op, fields_dict, ns_model=NS_SIC_MODEL): """Build SOAP envelope with WCF-required alphabetic field order.""" parts = [] for name in sorted(fields_dict.keys()): # WCF needs alphabetic order val = fields_dict[name] if val is None: parts.append(f' ') else: parts.append(f' {val}') return ENVELOPE_TPL.format( ns_model=ns_model, ns_integ=NS_SIC_INTEG, username=os.environ['SEAP_USER'], password=os.environ['SEAP_PASS'], op=op, fields='\n'.join(parts), ) def make_session(): s = requests.Session() s.cert = (str(CRT_PATH), str(KEY_PATH)) return s def soap_call(session, op, fields, contract='ISupplierWebService', timeout=120): ns = OP_NAMESPACE.get(op, NS_SIC_MODEL) body = build_envelope(op, fields, ns_model=ns) headers = { 'Content-Type': 'text/xml; charset=utf-8', 'SOAPAction': f'"http://tempuri.org/{contract}/{op}"', } t0 = time.time() r = session.post(PROD_URL, data=body.encode(), headers=headers, timeout=timeout) elapsed = time.time() - t0 return r, elapsed def parse_response(text): """Extract Status, Description, PageIndex, PageTotal from SOAP response.""" status = re.search(r']*>([^<]+)', text) desc = re.search(r']*>([^<]*)', text) pi = re.search(r'(\d+)', text) pt = re.search(r'(\d+)', text) fault = re.search(r']*>([^<]+)', text) return { 'status': status.group(1) if status else (f'FAULT:{fault.group(1)}' if fault else 'UNKNOWN'), 'description': (desc.group(1)[:200] if desc and desc.group(1) else None), 'page_index': int(pi.group(1)) if pi else None, 'page_total': int(pt.group(1)) if pt else None, 'size': len(text), } # ── Test definitions ── def fields_for(op): """Return safe field dict for each operation (alphabetic order auto-applied).""" today = datetime.now().date() a_week_ago = today - timedelta(days=7) a_year_ago = today - timedelta(days=365) s = lambda d: d.strftime('%Y-%m-%dT00:00:00') e = lambda d: d.strftime('%Y-%m-%dT23:59:59') # Notice operations: PublicationStartDate / PublicationEndDate + PageIndex if op in ('SU_PiNotices', 'SU_CNotices', 'SU_CaNotices', 'SU_DCNotices', 'SU_PCNotices', 'SU_RfqNotices', 'SU_ENotices', 'SU_RdcNotices'): return { 'PageIndex': 1, 'PublicationEndDate': e(today), 'PublicationStartDate': s(a_week_ago), } # RFQ Invitations: same if op == 'SU_RfqInvitations': return { 'PageIndex': 1, 'PublicationEndDate': e(today), 'PublicationStartDate': s(a_week_ago), } # Electronic Auction: separate fields if op == 'SU_EAProcedure': return { 'EndDate': e(today), 'PageIndex': 1, 'StartDate': s(a_week_ago), } # Beletage-scoped contracts if op == 'SuContracts': return { 'ContractEndDate': e(today), 'ContractStartDate': s(a_year_ago), 'PageIndex': 1, } if op == 'SuInvoices': return { 'MaxDate': e(today), 'MinDate': s(a_year_ago), 'PageIndex': 1, } if op == 'SuDirectAcquisitions': return { 'EndDate': e(today), 'PageIndex': 1, 'StartDate': s(a_year_ago), } if op == 'Catalog_ListItems': return { 'LastUpdateEnd': e(today), 'LastUpdateStart': s(a_year_ago), } return {'PageIndex': 1} # ── Tests ── def t1_supplier_pub_ops(session): print('\n=== T1: SU_* public-data operations (last 7 days) ===') print(f'{"Op":<22}{"Status":<18}{"PageTotal":<11}{"Size":<10}{"Time":<8}{"Description"}') print('-' * 110) results = {} for op in SUPPLIER_PUB_OPS: try: r, elapsed = soap_call(session, op, fields_for(op)) parsed = parse_response(r.text) results[op] = parsed desc = (parsed['description'] or '')[:50] print(f'{op:<22}{parsed["status"]:<18}{str(parsed["page_total"]):<11}' f'{parsed["size"]:<10}{elapsed:.1f}s {desc}') except Exception as ex: print(f'{op:<22}ERROR - - - {str(ex)[:60]}') results[op] = {'status': 'ERROR'} return results def t2_supplier_own_ops(session): print('\n=== T2: Su* / Catalog_* Beletage-scoped operations (last year) ===') print(f'{"Op":<22}{"Status":<18}{"PageTotal":<11}{"Size":<10}{"Time":<8}{"Description"}') print('-' * 110) results = {} for op in SUPPLIER_OWN_OPS: try: r, elapsed = soap_call(session, op, fields_for(op)) parsed = parse_response(r.text) results[op] = parsed desc = (parsed['description'] or '')[:50] print(f'{op:<22}{parsed["status"]:<18}{str(parsed["page_total"]):<11}' f'{parsed["size"]:<10}{elapsed:.1f}s {desc}') except Exception as ex: print(f'{op:<22}ERROR - - - {str(ex)[:60]}') results[op] = {'status': 'ERROR'} return results def t3_pagination(session): """Probe volume scaling and check PageIndex 0 vs 1.""" print('\n=== T3: Pagination + volume scaling (SU_CaNotices) ===') # 1 day vs 7 days vs 30 days today = datetime.now().date() windows = [('1 day', today - timedelta(days=1)), ('7 days', today - timedelta(days=7)), ('30 days', today - timedelta(days=30))] for label, start in windows: fields = { 'PageIndex': 1, 'PublicationEndDate': today.strftime('%Y-%m-%dT23:59:59'), 'PublicationStartDate': start.strftime('%Y-%m-%dT00:00:00'), } r, elapsed = soap_call(session, 'SU_CaNotices', fields) p = parse_response(r.text) print(f' {label:<10} PageTotal={p["page_total"]:<6} ' f'~{(p["page_total"] or 0) * 100:>8} items ' f'page1 size={p["size"]/1024:.0f}KB {elapsed:.1f}s') # PageIndex 0 vs 1 — does server treat 0 as "first" or as invalid? print('\n PageIndex 0 vs 1 vs 999 vs 99999 (last 7 days):') for pi in [0, 1, 999, 99999]: fields = { 'PageIndex': pi, 'PublicationEndDate': today.strftime('%Y-%m-%dT23:59:59'), 'PublicationStartDate': (today - timedelta(days=7)).strftime('%Y-%m-%dT00:00:00'), } r, _ = soap_call(session, 'SU_CaNotices', fields) p = parse_response(r.text) print(f' PageIndex={pi:>5} → status={p["status"]:<14} ' f'returned PageIndex={p["page_index"]} size={p["size"]/1024:.0f}KB') def t4_rate_limit(session): """Sustained call rate to detect throttling.""" print('\n=== T4: Rate limit probe (10 sequential SU_CaNotices, 1-day window) ===') today = datetime.now().date() fields = { 'PageIndex': 1, 'PublicationEndDate': today.strftime('%Y-%m-%dT23:59:59'), 'PublicationStartDate': (today - timedelta(days=1)).strftime('%Y-%m-%dT00:00:00'), } times = [] statuses = [] for i in range(10): t0 = time.time() r, _ = soap_call(session, 'SU_CaNotices', fields) elapsed = time.time() - t0 p = parse_response(r.text) times.append(elapsed) statuses.append((r.status_code, p['status'])) time.sleep(0.1) print(f' HTTP statuses: {set(statuses)}') print(f' Avg response: {sum(times)/len(times):.2f}s ' f'min: {min(times):.2f}s max: {max(times):.2f}s') if all(s[0] == 200 and s[1] == 'Success' for s in statuses): print(' ✓ No throttling at ~10 req/sec sustained') else: print(' ⚠ Some requests were rejected — check statuses') def t5_su_contracts_beletage(session): """Confirm Beletage's own contracts + sample first contract.""" print('\n=== T5: SuContracts — Beletage own contracts (last 5 years) ===') today = datetime.now().date() fields = { 'ContractEndDate': today.strftime('%Y-%m-%dT23:59:59'), 'ContractStartDate': (today - timedelta(days=365 * 5)).strftime('%Y-%m-%dT00:00:00'), 'PageIndex': 1, } r, elapsed = soap_call(session, 'SuContracts', fields) p = parse_response(r.text) print(f' Status: {p["status"]}, PageTotal: {p["page_total"]}, ' f'size: {p["size"]/1024:.0f}KB, {elapsed:.1f}s') if p['description']: print(f' Description: {p["description"]}') # Try to extract first ContractTitle titles = re.findall(r'([^<]+)', r.text)[:5] nos = re.findall(r'([^<]+)', r.text)[:5] if nos: print(' First contracts:') for no, title in zip(nos, titles + [''] * 5): print(f' {no}: {title[:60]}') # ── Main ── def main(): for var in ('SEAP_USER', 'SEAP_PASS', 'SEAP_CERT_KEY'): if not os.environ.get(var): print(f'ERROR: {var} not set — source Infisical first', file=sys.stderr) sys.exit(1) print('Setting up cert/key in /dev/shm...') setup_certs() print(f' cert: {CRT_PATH} key: {KEY_PATH}') session = make_session() t1_results = t1_supplier_pub_ops(session) t2_results = t2_supplier_own_ops(session) t3_pagination(session) t4_rate_limit(session) t5_su_contracts_beletage(session) print('\n=== Summary ===') success_pub = sum(1 for r in t1_results.values() if r.get('status') == 'Success') success_own = sum(1 for r in t2_results.values() if r.get('status') == 'Success') print(f' Public-data operations (SU_*): {success_pub}/{len(SUPPLIER_PUB_OPS)} return Success') print(f' Own-data operations (Su*): {success_own}/{len(SUPPLIER_OWN_OPS)} return Success') if __name__ == '__main__': main()