initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
+125
@@ -0,0 +1,125 @@
|
||||
#!/bin/bash
|
||||
# ANAF datornici — LIVE scraper wrapper (Cloudflare Turnstile via 2captcha).
|
||||
#
|
||||
# Mirrors scrape-cnsc.sh / scrape-anaf-datornici.sh pattern but runs a Python
|
||||
# script (not TSX) because the live scraper uses requests + psycopg2 and shares
|
||||
# nothing with the data.gov.ro one-shot TS importer.
|
||||
#
|
||||
# Infisical Machine Identity → env-file (DATABASE_URL + TWOCAPTCHA_KEY) →
|
||||
# docker run --env-file (NEVER -e $VAR), file deleted post-launch.
|
||||
#
|
||||
# Idempotent (UPSERT on cui+publication_date). Designed to be triggered
|
||||
# quarterly by vreaudigital-anaf-datornici.timer.
|
||||
#
|
||||
# ⚠️ COST: each run spends real money via 2captcha (~$0.50-3 per quarterly
|
||||
# tick, ~$60-100 one-time for 10-year backfill). Do NOT enable the systemd
|
||||
# timer until TWOCAPTCHA_KEY is funded — see HANDOFF-anaf-datornici-2captcha.md.
|
||||
#
|
||||
# Env knobs:
|
||||
# DRY_RUN=1 — parse-only, zero spend, zero DB writes.
|
||||
# BACKFILL_FROM=2016-Q1 — iterate from quarter X through current.
|
||||
# CATEGORIES=mari,mijlocii — subset of {mari,mijlocii,mici,institutii_publice,persoane_fizice}.
|
||||
# INCLUDE_LISTA_ALBA=1 — also scrape anaf.lista_alba (separate endpoint).
|
||||
#
|
||||
# Run:
|
||||
# sudo /opt/vreaudigital/services/seap-scraper/cron/scrape-anaf-datornici-live.sh
|
||||
# sudo DRY_RUN=1 /opt/vreaudigital/services/seap-scraper/cron/scrape-anaf-datornici-live.sh
|
||||
# sudo BACKFILL_FROM=2016-Q1 INCLUDE_LISTA_ALBA=1 /opt/.../scrape-anaf-datornici-live.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DRY_RUN="${DRY_RUN:-0}"
|
||||
BACKFILL_FROM="${BACKFILL_FROM:-}"
|
||||
CATEGORIES="${CATEGORIES:-}"
|
||||
INCLUDE_LISTA_ALBA="${INCLUDE_LISTA_ALBA:-0}"
|
||||
LOG=/var/log/vreaudigital-anaf-datornici.log
|
||||
|
||||
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG"; }
|
||||
|
||||
log "=== ANAF datornici LIVE scrape started (dry_run=$DRY_RUN backfill=$BACKFILL_FROM lista_alba=$INCLUDE_LISTA_ALBA) ==="
|
||||
|
||||
if docker ps --filter name=vreaudigital-anaf-datornici-live --format '{{.Names}}' \
|
||||
| grep -q '^vreaudigital-anaf-datornici-live$'; then
|
||||
log "WARN: vreaudigital-anaf-datornici-live already running, skipping this tick"
|
||||
exit 0
|
||||
fi
|
||||
docker rm -f vreaudigital-anaf-datornici-live 2>/dev/null || true
|
||||
|
||||
# ── Fetch DATABASE_URL + TWOCAPTCHA_KEY via Infisical Machine Identity ──
|
||||
source /opt/vreaudigital/.infisical-mi
|
||||
TOKEN=$(infisical login --method=universal-auth \
|
||||
--domain="$INFISICAL_API_URL" \
|
||||
--client-id="$INFISICAL_CLIENT_ID" \
|
||||
--client-secret="$INFISICAL_CLIENT_SECRET" \
|
||||
--silent --plain)
|
||||
|
||||
umask 077
|
||||
ENVF=$(mktemp /tmp/.vreaudigital-anaf-datornici-live-env.XXXXXX)
|
||||
|
||||
DBURL=$(infisical secrets get DATABASE_URL \
|
||||
--domain="$INFISICAL_API_URL" \
|
||||
--projectId="$INFISICAL_PROJECT_ID" \
|
||||
--env="$INFISICAL_ENV" --path="$INFISICAL_PATH" \
|
||||
--token="$TOKEN" --plain --silent)
|
||||
echo "DATABASE_URL=$DBURL" > "$ENVF"
|
||||
unset DBURL
|
||||
|
||||
# TWOCAPTCHA_KEY: required unless DRY_RUN=1. If missing, abort with a clear
|
||||
# pointer to the handoff doc — DO NOT silently run (would still hit ANAF page).
|
||||
if [ "$DRY_RUN" != "1" ]; then
|
||||
# Try primary path first ($INFISICAL_PATH = /vreaudigital), fall back to root.
|
||||
# Some users add TWOCAPTCHA_KEY at root path / (less project-namespaced).
|
||||
for try_path in "$INFISICAL_PATH" "/"; do
|
||||
TWOCAPTCHA_KEY=$(infisical secrets get TWOCAPTCHA_KEY \
|
||||
--domain="$INFISICAL_API_URL" \
|
||||
--projectId="$INFISICAL_PROJECT_ID" \
|
||||
--env="$INFISICAL_ENV" --path="$try_path" \
|
||||
--token="$TOKEN" --plain --silent 2>/dev/null || true)
|
||||
[ -n "${TWOCAPTCHA_KEY:-}" ] && break
|
||||
done
|
||||
if [ -z "${TWOCAPTCHA_KEY:-}" ]; then
|
||||
log "ERROR: TWOCAPTCHA_KEY missing in Infisical (checked $INFISICAL_PATH + /) — see HANDOFF-anaf-datornici-2captcha.md"
|
||||
log " Add via: NEW SECRET PROTOCOL (Infisical, either path /vreaudigital or /)"
|
||||
rm -f "$ENVF"
|
||||
exit 3
|
||||
fi
|
||||
echo "TWOCAPTCHA_KEY=$TWOCAPTCHA_KEY" >> "$ENVF"
|
||||
unset TWOCAPTCHA_KEY
|
||||
fi
|
||||
unset TOKEN
|
||||
|
||||
# Pass-through env knobs
|
||||
echo "DRY_RUN=$DRY_RUN" >> "$ENVF"
|
||||
[ -n "$BACKFILL_FROM" ] && echo "BACKFILL_FROM=$BACKFILL_FROM" >> "$ENVF"
|
||||
[ -n "$CATEGORIES" ] && echo "CATEGORIES=$CATEGORIES" >> "$ENVF"
|
||||
[ "$INCLUDE_LISTA_ALBA" = "1" ] && echo "INCLUDE_LISTA_ALBA=1" >> "$ENVF"
|
||||
echo "ANAF_DATORNICI_LOG=/work/.log/anaf-datornici.log" >> "$ENVF"
|
||||
|
||||
cd /opt/vreaudigital/services/seap-scraper
|
||||
|
||||
# Ensure /work/.log is writable inside container (host bind-mount); the
|
||||
# Python process also tees to stdout → docker logs → journald.
|
||||
mkdir -p .log
|
||||
|
||||
CID=$(docker run -d \
|
||||
--name vreaudigital-anaf-datornici-live \
|
||||
--network host \
|
||||
--env-file "$ENVF" \
|
||||
-v "$(pwd):/work" \
|
||||
-w /work \
|
||||
--user "$(id -u):$(id -g)" \
|
||||
--restart no \
|
||||
python:3.12-slim \
|
||||
bash -c "pip install --quiet --no-cache-dir psycopg2-binary requests && python3 scrapers/anaf_datornici/scraper.py")
|
||||
log "container started: $CID"
|
||||
|
||||
sleep 3
|
||||
rm -f "$ENVF"
|
||||
log "envfile cleaned"
|
||||
|
||||
docker wait vreaudigital-anaf-datornici-live >/dev/null
|
||||
EXIT_CODE=$(docker inspect -f '{{.State.ExitCode}}' vreaudigital-anaf-datornici-live 2>/dev/null || echo "?")
|
||||
docker logs vreaudigital-anaf-datornici-live 2>&1 | tail -30 | tee -a "$LOG"
|
||||
log "=== ANAF datornici LIVE scrape done (exit=$EXIT_CODE) ==="
|
||||
|
||||
exit "$EXIT_CODE"
|
||||
Reference in New Issue
Block a user