Files
vreau-digital/services/seap-scraper/cron/scrape-asf.sh
T
Claude VM a6c03a091e initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
2026-05-13 00:10:32 +03:00

87 lines
2.8 KiB
Bash
Executable File

#!/bin/bash
# ASF — Autoritatea de Supraveghere Financiară.
# Scrapes the public registry of authorized financial entities (insurers,
# brokers, etc.) from data.asfromania.ro/scr/ra. ~860 entities.
#
# Mirrors scrape-anre.sh pattern: Infisical Machine Identity → env-file →
# docker run --env-file (NEVER -e $VAR), file deleted post-launch.
#
# Idempotent (UPSERT on UNIQUE(register_type, register_no)).
# Safe to run from cron.
#
# Env knobs:
# LIMIT=0 (default: 0 = full)
# NO_GAPFILL=0 (default: 0 = run gapfill; set 1 to skip)
#
# Run:
# sudo LIMIT=20 /opt/vreaudigital/services/seap-scraper/cron/scrape-asf.sh # smoke
# sudo /opt/vreaudigital/services/seap-scraper/cron/scrape-asf.sh # full
set -euo pipefail
LIMIT="${LIMIT:-0}"
NO_GAPFILL="${NO_GAPFILL:-0}"
LOG=/var/log/vreaudigital-asf.log
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; }
log "=== ASF scrape started (limit=$LIMIT no_gapfill=$NO_GAPFILL) ==="
if docker ps --filter name=vreaudigital-asf --format '{{.Names}}' | grep -q '^vreaudigital-asf$'; then
log "WARN: vreaudigital-asf already running, skipping this tick"
exit 0
fi
docker rm -f vreaudigital-asf 2>/dev/null || true
# ── Fetch DATABASE_URL via Infisical Machine Identity ──
source /opt/vreaudigital/.infisical-mi
TOKEN=$(infisical login --method=universal-auth \
--domain="$INFISICAL_API_URL" \
--client-id="$INFISICAL_CLIENT_ID" \
--client-secret="$INFISICAL_CLIENT_SECRET" \
--silent --plain)
umask 077
ENVF=$(mktemp /tmp/.vreaudigital-asf-env.XXXXXX)
DBURL=$(infisical secrets get DATABASE_URL \
--domain="$INFISICAL_API_URL" \
--projectId="$INFISICAL_PROJECT_ID" \
--env="$INFISICAL_ENV" --path="$INFISICAL_PATH" \
--token="$TOKEN" --plain --silent)
echo "DATABASE_URL=$DBURL" > "$ENVF"
unset DBURL TOKEN
cd /opt/vreaudigital/services/seap-scraper
if [ ! -d node_modules/tsx ]; then
log "Installing seap-scraper deps..."
docker run --rm -v "$(pwd):/work" -w /work --user "$(id -u):$(id -g)" \
node:22-alpine npm install --omit=optional 2>&1 | tee -a "$LOG" >/dev/null
fi
EXTRA_ARGS=""
[ "$LIMIT" -gt 0 ] 2>/dev/null && EXTRA_ARGS="$EXTRA_ARGS --limit=$LIMIT"
[ "$NO_GAPFILL" = "1" ] && EXTRA_ARGS="$EXTRA_ARGS --no-gapfill"
CID=$(docker run -d \
--name vreaudigital-asf \
--network host \
--env-file "$ENVF" \
-v "$(pwd):/work" \
-w /work \
--user "$(id -u):$(id -g)" \
--restart no \
node:22-alpine \
npx tsx src/scrape-asf.ts $EXTRA_ARGS)
log "container started: $CID"
sleep 3
rm -f "$ENVF"
log "envfile cleaned"
docker wait vreaudigital-asf >/dev/null
EXIT_CODE=$(docker inspect -f '{{.State.ExitCode}}' vreaudigital-asf 2>/dev/null || echo "?")
docker logs vreaudigital-asf 2>&1 | tail -40 | tee -a "$LOG"
log "=== ASF scrape done (exit=$EXIT_CODE) ==="
exit "$EXIT_CODE"