Files
vreau-digital/services/seap-scraper/wsp/runner.py
T
Claude VM a6c03a091e initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
2026-05-13 00:10:32 +03:00

143 lines
4.7 KiB
Python

"""
WSP CLI runner.
Usage:
python -m wsp.runner status
python -m wsp.runner incremental [op_name|all]
python -m wsp.runner backfill <op_name> --from YYYY-MM-DD --to YYYY-MM-DD [--workers 3]
python -m wsp.runner test
"""
from __future__ import annotations
import argparse
import logging
import sys
from datetime import datetime, timedelta
from . import db, sync
from .operations import ALL_OPS, PUBLIC_OPS, OWN_OPS
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(levelname)s [%(name)s] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
)
log = logging.getLogger('wsp.runner')
def cmd_status(args):
with db.connection() as conn:
cur = conn.cursor()
cur.execute("""
SELECT feed, last_run_at, last_cursor_date,
items_imported_total, items_imported_24h,
consecutive_errors, last_error
FROM seap.wsp_sync_state ORDER BY feed
""")
rows = cur.fetchall()
print(f'\n{"Feed":<25} {"Last run":<22} {"Cursor":<22} {"Total":>10} {"Errors"}')
print('-' * 100)
for r in rows:
feed, last_run, cursor, total, _, errs, err_msg = r
err_str = err_msg[:40] + '...' if err_msg and len(err_msg) > 40 else (err_msg or '')
print(f'{feed:<25} {str(last_run)[:19]:<22} {str(cursor)[:19]:<22} '
f'{total or 0:>10,} {errs}/{err_str}')
cur.execute("""
SELECT feed, state, count(*) FROM seap.wsp_backfill_windows
GROUP BY feed, state ORDER BY feed, state
""")
rows = cur.fetchall()
if rows:
print(f'\n{"Backfill queue":<30}')
print('-' * 60)
for feed, state, count in rows:
print(f' {feed:<25} {state:<15} {count:>8}')
cur.execute("""
SELECT source, count(*) FROM seap.announcements
WHERE source LIKE 'wsp_%' GROUP BY source ORDER BY source
""")
rows = cur.fetchall()
if rows:
print(f'\n{"Source":<25} {"Rows in seap.announcements"}')
print('-' * 60)
for source, count in rows:
print(f' {source:<25} {count:>10,}')
def cmd_incremental(args):
target_ops = args.ops if args.ops != ['all'] else list(ALL_OPS)
for op_name in target_ops:
if op_name not in ALL_OPS:
log.error('Unknown op: %s', op_name)
continue
log.info('=== Running incremental: %s ===', op_name)
result = sync.run_incremental(op_name, lookback_hours=args.lookback_hours)
log.info('Result: %s', result)
def cmd_backfill(args):
if args.op not in ALL_OPS:
log.error('Unknown op: %s. Available: %s', args.op, list(ALL_OPS))
sys.exit(1)
start = datetime.fromisoformat(args.start)
end = datetime.fromisoformat(args.end)
log.info('=== Backfill %s [%s, %s) workers=%d ===',
args.op, start, end, args.workers)
result = sync.run_backfill(args.op, start, end, workers=args.workers)
log.info('Done: %s', result)
def cmd_test(args):
"""Run a 1-day end-to-end test on yesterday (all SU_* feeds)."""
yesterday = datetime.now().date() - timedelta(days=1)
start = datetime.combine(yesterday, datetime.min.time())
end = datetime.combine(yesterday, datetime.max.time())
feeds_to_test = ['SU_CaNotices', 'SU_CNotices', 'SU_PiNotices',
'SU_RfqNotices', 'SU_RfqInvitations',
'SU_DCNotices', 'SU_PCNotices', 'SU_RdcNotices', 'SU_ENotices']
log.info('=== Smoke test on %s ===', yesterday)
for op_name in feeds_to_test:
log.info('--- %s ---', op_name)
try:
r = sync.run_backfill(op_name, start, end, workers=1, enqueue=True)
log.info('%d items imported', r['items_imported'])
except Exception as e:
log.exception(' → ERROR: %s', e)
def main():
parser = argparse.ArgumentParser(prog='wsp.runner')
sub = parser.add_subparsers(dest='cmd', required=True)
sub.add_parser('status')
p_inc = sub.add_parser('incremental')
p_inc.add_argument('ops', nargs='*', default=['all'],
help='Operations to sync (default: all)')
p_inc.add_argument('--lookback-hours', type=int, default=36)
p_bf = sub.add_parser('backfill')
p_bf.add_argument('op')
p_bf.add_argument('--start', required=True, help='YYYY-MM-DD or full ISO')
p_bf.add_argument('--end', required=True)
p_bf.add_argument('--workers', type=int, default=3)
sub.add_parser('test')
args = parser.parse_args()
handlers = {
'status': cmd_status,
'incremental': cmd_incremental,
'backfill': cmd_backfill,
'test': cmd_test,
}
handlers[args.cmd](args)
if __name__ == '__main__':
main()