Files
Claude VM a6c03a091e initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix).
- 22 pages migrated, 127 files total
- All internal links: /achizitii/X → /X (176 occurrences fixed)
- AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub
- BaseLayout new (vreau.digital branding, OG tags, site URL)
- astro.config.mjs: site https://vreau.digital, server output (was static)
- docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital
- deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log

Backend shared with gov-agreg:
- PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...)
- Photon, Martin tiles
- Infisical /vreaudigital path (DATABASE_URL etc. shared)

build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
2026-05-13 00:10:32 +03:00

116 lines
3.7 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# Scraper Transparență Bugetară MFP — Faza 1: enumerare universul entităților
# publice raportoare + fuzzy match nume → CUI.
#
# Faza 2 (descărcare rapoarte XML) nu e implementată: aplicația MFP cere
# CAPTCHA pe fiecare căutare, ceea ce necesită captcha solver extern (2captcha
# / anti-captcha) și un buget pentru ~1.6M cereri (4-8K USD pentru ingest
# istoric complet 2020-2025). Vezi BUGETAR-PLAN.md pentru detalii.
#
# Modes:
# MODE=enumerate (default) → enumeră (sector × județ) → bugetar.entitate
# MODE=match-cui → fuzzy match denumire → firms.entities.cui_normalized
# MODE=full → enumerate + match-cui într-o singură rulare
#
# Idempotent. Sigur de rulat repetat (UPSERT).
set -euo pipefail
MODE="${MODE:-enumerate}"
JUDET="${JUDET:-}"
SECTOR="${SECTOR:-}"
DELAY_MS="${DELAY_MS:-500}"
LOG=/var/log/vreaudigital-bugetar.log
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; }
log "=== bugetar scraper started (mode=$MODE judet=${JUDET:-ALL} sector=${SECTOR:-ALL}) ==="
# Guard: previous run still going?
if docker ps --filter name=vreaudigital-bugetar --format '{{.Names}}' | grep -q '^vreaudigital-bugetar$'; then
log "WARN: vreaudigital-bugetar already running, skipping"
exit 0
fi
docker rm -f vreaudigital-bugetar 2>/dev/null || true
# ── Fetch DATABASE_URL via Infisical Machine Identity ──
source /opt/vreaudigital/.infisical-mi
TOKEN=$(infisical login --method=universal-auth \
--domain="$INFISICAL_API_URL" \
--client-id="$INFISICAL_CLIENT_ID" \
--client-secret="$INFISICAL_CLIENT_SECRET" \
--silent --plain)
umask 077
ENVF=$(mktemp /tmp/.vreaudigital-bugetar-env.XXXXXX)
DBURL=$(infisical secrets get DATABASE_URL \
--domain="$INFISICAL_API_URL" \
--projectId="$INFISICAL_PROJECT_ID" \
--env="$INFISICAL_ENV" --path="$INFISICAL_PATH" \
--token="$TOKEN" --plain --silent)
echo "DATABASE_URL=$DBURL" > "$ENVF"
unset DBURL TOKEN
cd /opt/vreaudigital/services/seap-scraper
# Make sure node_modules exists.
if [ ! -d node_modules/tsx ]; then
log "Installing seap-scraper deps..."
docker run --rm -v "$(pwd):/work" -w /work --user "$(id -u):$(id -g)" \
node:22-alpine npm install --omit=optional 2>&1 | tee -a "$LOG" >/dev/null
fi
run_scraper_mode() {
local mode="$1"
local extra_args=""
[ -n "$JUDET" ] && extra_args="$extra_args --judet=$JUDET"
[ -n "$SECTOR" ] && extra_args="$extra_args --sector=$SECTOR"
[ "$mode" = "enumerate" ] && extra_args="$extra_args --delay-ms=$DELAY_MS"
log "running mode=$mode args=$extra_args"
CID=$(docker run -d \
--name "vreaudigital-bugetar-$mode" \
--network host \
--env-file "$ENVF" \
-v "$(pwd):/work" \
-w /work \
--user "$(id -u):$(id -g)" \
--restart no \
node:22-alpine \
npx tsx src/scrape-bugetar.ts --mode="$mode" $extra_args)
log " container: $CID"
sleep 3 # daemon a citit envfile
docker wait "vreaudigital-bugetar-$mode" >/dev/null
EXIT_CODE=$(docker inspect -f '{{.State.ExitCode}}' "vreaudigital-bugetar-$mode" 2>/dev/null || echo "?")
docker logs "vreaudigital-bugetar-$mode" 2>&1 | tail -10 | tee -a "$LOG"
docker rm -f "vreaudigital-bugetar-$mode" >/dev/null 2>&1 || true
return "$EXIT_CODE"
}
EXIT_CODE=0
case "$MODE" in
enumerate)
run_scraper_mode enumerate || EXIT_CODE=$?
;;
match-cui)
run_scraper_mode match-cui || EXIT_CODE=$?
;;
full)
run_scraper_mode enumerate || EXIT_CODE=$?
if [ "$EXIT_CODE" -eq 0 ]; then
run_scraper_mode match-cui || EXIT_CODE=$?
fi
;;
*)
log "ERROR: unknown MODE=$MODE (use enumerate|match-cui|full)"
EXIT_CODE=2
;;
esac
rm -f "$ENVF"
log "envfile cleaned"
log "=== bugetar scraper done (exit=$EXIT_CODE) ==="
exit "$EXIT_CODE"