a6c03a091e
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
116 lines
3.7 KiB
Bash
Executable File
116 lines
3.7 KiB
Bash
Executable File
#!/bin/bash
|
||
# Scraper Transparență Bugetară MFP — Faza 1: enumerare universul entităților
|
||
# publice raportoare + fuzzy match nume → CUI.
|
||
#
|
||
# Faza 2 (descărcare rapoarte XML) nu e implementată: aplicația MFP cere
|
||
# CAPTCHA pe fiecare căutare, ceea ce necesită captcha solver extern (2captcha
|
||
# / anti-captcha) și un buget pentru ~1.6M cereri (4-8K USD pentru ingest
|
||
# istoric complet 2020-2025). Vezi BUGETAR-PLAN.md pentru detalii.
|
||
#
|
||
# Modes:
|
||
# MODE=enumerate (default) → enumeră (sector × județ) → bugetar.entitate
|
||
# MODE=match-cui → fuzzy match denumire → firms.entities.cui_normalized
|
||
# MODE=full → enumerate + match-cui într-o singură rulare
|
||
#
|
||
# Idempotent. Sigur de rulat repetat (UPSERT).
|
||
|
||
set -euo pipefail
|
||
|
||
MODE="${MODE:-enumerate}"
|
||
JUDET="${JUDET:-}"
|
||
SECTOR="${SECTOR:-}"
|
||
DELAY_MS="${DELAY_MS:-500}"
|
||
LOG=/var/log/vreaudigital-bugetar.log
|
||
|
||
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; }
|
||
|
||
log "=== bugetar scraper started (mode=$MODE judet=${JUDET:-ALL} sector=${SECTOR:-ALL}) ==="
|
||
|
||
# Guard: previous run still going?
|
||
if docker ps --filter name=vreaudigital-bugetar --format '{{.Names}}' | grep -q '^vreaudigital-bugetar$'; then
|
||
log "WARN: vreaudigital-bugetar already running, skipping"
|
||
exit 0
|
||
fi
|
||
docker rm -f vreaudigital-bugetar 2>/dev/null || true
|
||
|
||
# ── Fetch DATABASE_URL via Infisical Machine Identity ──
|
||
source /opt/vreaudigital/.infisical-mi
|
||
TOKEN=$(infisical login --method=universal-auth \
|
||
--domain="$INFISICAL_API_URL" \
|
||
--client-id="$INFISICAL_CLIENT_ID" \
|
||
--client-secret="$INFISICAL_CLIENT_SECRET" \
|
||
--silent --plain)
|
||
|
||
umask 077
|
||
ENVF=$(mktemp /tmp/.vreaudigital-bugetar-env.XXXXXX)
|
||
DBURL=$(infisical secrets get DATABASE_URL \
|
||
--domain="$INFISICAL_API_URL" \
|
||
--projectId="$INFISICAL_PROJECT_ID" \
|
||
--env="$INFISICAL_ENV" --path="$INFISICAL_PATH" \
|
||
--token="$TOKEN" --plain --silent)
|
||
echo "DATABASE_URL=$DBURL" > "$ENVF"
|
||
unset DBURL TOKEN
|
||
|
||
cd /opt/vreaudigital/services/seap-scraper
|
||
|
||
# Make sure node_modules exists.
|
||
if [ ! -d node_modules/tsx ]; then
|
||
log "Installing seap-scraper deps..."
|
||
docker run --rm -v "$(pwd):/work" -w /work --user "$(id -u):$(id -g)" \
|
||
node:22-alpine npm install --omit=optional 2>&1 | tee -a "$LOG" >/dev/null
|
||
fi
|
||
|
||
run_scraper_mode() {
|
||
local mode="$1"
|
||
local extra_args=""
|
||
[ -n "$JUDET" ] && extra_args="$extra_args --judet=$JUDET"
|
||
[ -n "$SECTOR" ] && extra_args="$extra_args --sector=$SECTOR"
|
||
[ "$mode" = "enumerate" ] && extra_args="$extra_args --delay-ms=$DELAY_MS"
|
||
|
||
log "running mode=$mode args=$extra_args"
|
||
CID=$(docker run -d \
|
||
--name "vreaudigital-bugetar-$mode" \
|
||
--network host \
|
||
--env-file "$ENVF" \
|
||
-v "$(pwd):/work" \
|
||
-w /work \
|
||
--user "$(id -u):$(id -g)" \
|
||
--restart no \
|
||
node:22-alpine \
|
||
npx tsx src/scrape-bugetar.ts --mode="$mode" $extra_args)
|
||
log " container: $CID"
|
||
|
||
sleep 3 # daemon a citit envfile
|
||
docker wait "vreaudigital-bugetar-$mode" >/dev/null
|
||
EXIT_CODE=$(docker inspect -f '{{.State.ExitCode}}' "vreaudigital-bugetar-$mode" 2>/dev/null || echo "?")
|
||
docker logs "vreaudigital-bugetar-$mode" 2>&1 | tail -10 | tee -a "$LOG"
|
||
docker rm -f "vreaudigital-bugetar-$mode" >/dev/null 2>&1 || true
|
||
return "$EXIT_CODE"
|
||
}
|
||
|
||
EXIT_CODE=0
|
||
case "$MODE" in
|
||
enumerate)
|
||
run_scraper_mode enumerate || EXIT_CODE=$?
|
||
;;
|
||
match-cui)
|
||
run_scraper_mode match-cui || EXIT_CODE=$?
|
||
;;
|
||
full)
|
||
run_scraper_mode enumerate || EXIT_CODE=$?
|
||
if [ "$EXIT_CODE" -eq 0 ]; then
|
||
run_scraper_mode match-cui || EXIT_CODE=$?
|
||
fi
|
||
;;
|
||
*)
|
||
log "ERROR: unknown MODE=$MODE (use enumerate|match-cui|full)"
|
||
EXIT_CODE=2
|
||
;;
|
||
esac
|
||
|
||
rm -f "$ENVF"
|
||
log "envfile cleaned"
|
||
|
||
log "=== bugetar scraper done (exit=$EXIT_CODE) ==="
|
||
exit "$EXIT_CODE"
|