#!/bin/bash # ANAF datornici scraper — runs scrape-anaf-datornici.ts in node:22-alpine. # Mirrors enrich-anaf.sh / scrape-regas.sh pattern: Infisical Machine Identity # → env-file → docker run --env-file (NEVER -e $VAR), file deleted post-launch. # # Default source: data.gov.ro Q1-2016 snapshot (only public bulk source available; # anaf.ro/restante/ live is CAPTCHA-blocked — see ANAF-DATORNICI-RECIPES.md). # # Idempotent (uses ON CONFLICT (cui, publication_date) DO UPDATE). Safe to run # from cron, but in practice this is a one-shot until live scraping unlocks. set -euo pipefail SOURCE="${SOURCE:-datagov2016}" DRY_RUN="${DRY_RUN:-0}" LOG=/var/log/vreaudigital-anaf-datornici.log log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; } log "=== ANAF datornici scrape started (source=$SOURCE dry-run=$DRY_RUN) ===" if docker ps --filter name=vreaudigital-anaf-datornici --format '{{.Names}}' \ | grep -q '^vreaudigital-anaf-datornici$'; then log "WARN: vreaudigital-anaf-datornici already running, skipping this tick" exit 0 fi docker rm -f vreaudigital-anaf-datornici 2>/dev/null || true # ── Fetch DATABASE_URL via Infisical Machine Identity ── source /opt/vreaudigital/.infisical-mi TOKEN=$(infisical login --method=universal-auth \ --domain="$INFISICAL_API_URL" \ --client-id="$INFISICAL_CLIENT_ID" \ --client-secret="$INFISICAL_CLIENT_SECRET" \ --silent --plain) umask 077 ENVF=$(mktemp /tmp/.vreaudigital-env.XXXXXX) DBURL=$(infisical secrets get DATABASE_URL \ --domain="$INFISICAL_API_URL" \ --projectId="$INFISICAL_PROJECT_ID" \ --env="$INFISICAL_ENV" --path="$INFISICAL_PATH" \ --token="$TOKEN" --plain --silent) echo "DATABASE_URL=$DBURL" > "$ENVF" unset DBURL TOKEN # ── Launch detached docker container ── cd /opt/vreaudigital/services/seap-scraper if [ ! -d node_modules/tsx ]; then log "Installing seap-scraper deps..." docker run --rm -v "$(pwd):/work" -w /work --user "$(id -u):$(id -g)" \ node:22-alpine npm install --omit=optional 2>&1 | tee -a "$LOG" >/dev/null fi DRY_FLAG="" if [ "$DRY_RUN" = "1" ]; then DRY_FLAG="--dry-run" fi CID=$(docker run -d \ --name vreaudigital-anaf-datornici \ --network host \ --env-file "$ENVF" \ -v "$(pwd):/work" \ -w /work \ --user "$(id -u):$(id -g)" \ --restart no \ node:22-alpine \ npx tsx src/scrape-anaf-datornici.ts \ --source="$SOURCE" \ $DRY_FLAG) log "container started: $CID" sleep 3 rm -f "$ENVF" log "envfile cleaned" docker wait vreaudigital-anaf-datornici >/dev/null EXIT_CODE=$(docker inspect -f '{{.State.ExitCode}}' vreaudigital-anaf-datornici 2>/dev/null || echo "?") docker logs vreaudigital-anaf-datornici 2>&1 | tail -15 | tee -a "$LOG" log "=== ANAF datornici scrape done (exit=$EXIT_CODE) ===" exit "$EXIT_CODE"