initial: split from gov-agreg — vreau.digital standalone platform

Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
2026-05-13 00:10:32 +03:00
commit a6c03a091e
352 changed files with 75295 additions and 0 deletions
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""AFIR XLSX → pipe-delimited TSV normalizer.
+
+Source: AFIR yearly listaplati XLSX (FEADR or FEGA), as published at
+https://www.afir.ro/rapoarte/beneficiari-de-fonduri-europene/date-deschise/
+
+The XLSX has 9 banner rows, then a 15-column header at row 10 (1-indexed),
+then ~470K-560K data rows. Schema (since 2023, identical for 2024):
+
+  Numele beneficiarului
+  Numele de familie al beneficiarului
+  Denumirea societatii-mama si codul de inregistrare fiscala
+  Localitate
+  Codul masurii/tipului de interventie
+  Obiectiv
+  Data inceperii
+  Data incheierii
+  Cuantum Operatiune FEGA
+  Cuantum Total FEGA
+  Cuantum Operatiune FEADR
+  Cuantum Total FEADR
+  Cuantum aferent operatiunii
+  Cuantum total cofinantare beneficiari
+  Cuantum total UE Beneficiar
+
+Output: pipe-delimited TSV (no quoting), in the same column order, suitable
+for `\\copy fonduri.staging_afir FROM ... WITH (FORMAT text, DELIMITER '|')`.
+
+Usage:
+  python3 import-afir-historical.py INPUT.xlsx OUTPUT.tsv
+
+Numeric columns are normalized: Romanian decimal "12.345,67" → "12345.67".
+Empty strings stay empty (NULL in COPY with NULL '').
+"""
+
+import sys
+import re
+
+import openpyxl
+
+EXPECTED_HEADER = "Numele beneficiarului"
+
+
+def norm_num(v):
+    if v is None:
+        return ""
+    if isinstance(v, (int, float)):
+        # Already numeric (rare for AFIR XLSX — values arrive as strings).
+        return f"{v:.2f}".replace("-0.00", "0.00")
+    s = str(v).strip()
+    if not s:
+        return ""
+    # Strip thousands "." and convert "," → "."
+    # AFIR uses Romanian format: 12.345,67  or  12345,67  or  0,00
+    if "," in s:
+        s = s.replace(".", "").replace(",", ".")
+    # Strip leading/trailing whitespace, replace any embedded pipe to be safe
+    return s.replace("|", "/")
+
+
+def norm_text(v):
+    if v is None:
+        return ""
+    s = str(v).strip()
+    if not s:
+        return ""
+    # COPY text format: tab and pipe collide with our delimiter; backslash needs escape.
+    # We chose pipe as delimiter — replace embedded pipes with "/".
+    # Newlines collapse to space.
+    s = s.replace("|", "/").replace("\t", " ").replace("\r", " ").replace("\n", " ")
+    s = re.sub(r"\s+", " ", s)
+    # Backslash escape for Postgres COPY text format
+    s = s.replace("\\", "\\\\")
+    return s
+
+
+def main():
+    if len(sys.argv) != 3:
+        print("usage: import-afir-historical.py INPUT.xlsx OUTPUT.tsv", file=sys.stderr)
+        sys.exit(2)
+    in_path, out_path = sys.argv[1], sys.argv[2]
+
+    wb = openpyxl.load_workbook(in_path, read_only=True, data_only=True)
+    ws = wb.active
+
+    rows = ws.iter_rows(values_only=True)
+    header_idx = None
+    for i, r in enumerate(rows):
+        if r and r[0] and EXPECTED_HEADER in str(r[0]):
+            header_idx = i
+            break
+        if i > 50:
+            break
+    if header_idx is None:
+        print("[afir-import] ERROR: header row not found in first 50 rows", file=sys.stderr)
+        sys.exit(1)
+
+    n_data = 0
+    n_skipped = 0
+
+    with open(out_path, "w", encoding="utf-8") as f:
+        for r in rows:
+            # 16 columns observed (last is None padding)
+            if r is None:
+                continue
+            cells = list(r) + [None] * (16 - len(r))
+            beneficiar = norm_text(cells[0])
+            if not beneficiar:
+                # Trailing empty rows
+                n_skipped += 1
+                continue
+
+            out = [
+                beneficiar,
+                norm_text(cells[1]),  # last_name
+                norm_text(cells[2]),  # mama_cui
+                norm_text(cells[3]),  # localitate
+                norm_text(cells[4]),  # cod_masura
+                norm_text(cells[5]),  # obiectiv
+                norm_text(cells[6]),  # data_start
+                norm_text(cells[7]),  # data_end
+                norm_num(cells[8]),   # fega_op
+                norm_num(cells[9]),   # fega_total
+                norm_num(cells[10]),  # feadr_op
+                norm_num(cells[11]),  # feadr_total
+                norm_num(cells[12]),  # op_amount
+                norm_num(cells[13]),  # cofinantare
+                norm_num(cells[14]),  # ue_total
+            ]
+            f.write("|".join(out) + "\n")
+            n_data += 1
+            if n_data % 50000 == 0:
+                print(f"[afir-import] wrote {n_data} rows", file=sys.stderr)
+
+    print(f"[afir-import] done — {n_data} rows, {n_skipped} skipped", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()