initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
+139
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env python3
|
||||
"""AFIR XLSX → pipe-delimited TSV normalizer.
|
||||
|
||||
Source: AFIR yearly listaplati XLSX (FEADR or FEGA), as published at
|
||||
https://www.afir.ro/rapoarte/beneficiari-de-fonduri-europene/date-deschise/
|
||||
|
||||
The XLSX has 9 banner rows, then a 15-column header at row 10 (1-indexed),
|
||||
then ~470K-560K data rows. Schema (since 2023, identical for 2024):
|
||||
|
||||
Numele beneficiarului
|
||||
Numele de familie al beneficiarului
|
||||
Denumirea societatii-mama si codul de inregistrare fiscala
|
||||
Localitate
|
||||
Codul masurii/tipului de interventie
|
||||
Obiectiv
|
||||
Data inceperii
|
||||
Data incheierii
|
||||
Cuantum Operatiune FEGA
|
||||
Cuantum Total FEGA
|
||||
Cuantum Operatiune FEADR
|
||||
Cuantum Total FEADR
|
||||
Cuantum aferent operatiunii
|
||||
Cuantum total cofinantare beneficiari
|
||||
Cuantum total UE Beneficiar
|
||||
|
||||
Output: pipe-delimited TSV (no quoting), in the same column order, suitable
|
||||
for `\\copy fonduri.staging_afir FROM ... WITH (FORMAT text, DELIMITER '|')`.
|
||||
|
||||
Usage:
|
||||
python3 import-afir-historical.py INPUT.xlsx OUTPUT.tsv
|
||||
|
||||
Numeric columns are normalized: Romanian decimal "12.345,67" → "12345.67".
|
||||
Empty strings stay empty (NULL in COPY with NULL '').
|
||||
"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
import openpyxl
|
||||
|
||||
EXPECTED_HEADER = "Numele beneficiarului"
|
||||
|
||||
|
||||
def norm_num(v):
|
||||
if v is None:
|
||||
return ""
|
||||
if isinstance(v, (int, float)):
|
||||
# Already numeric (rare for AFIR XLSX — values arrive as strings).
|
||||
return f"{v:.2f}".replace("-0.00", "0.00")
|
||||
s = str(v).strip()
|
||||
if not s:
|
||||
return ""
|
||||
# Strip thousands "." and convert "," → "."
|
||||
# AFIR uses Romanian format: 12.345,67 or 12345,67 or 0,00
|
||||
if "," in s:
|
||||
s = s.replace(".", "").replace(",", ".")
|
||||
# Strip leading/trailing whitespace, replace any embedded pipe to be safe
|
||||
return s.replace("|", "/")
|
||||
|
||||
|
||||
def norm_text(v):
|
||||
if v is None:
|
||||
return ""
|
||||
s = str(v).strip()
|
||||
if not s:
|
||||
return ""
|
||||
# COPY text format: tab and pipe collide with our delimiter; backslash needs escape.
|
||||
# We chose pipe as delimiter — replace embedded pipes with "/".
|
||||
# Newlines collapse to space.
|
||||
s = s.replace("|", "/").replace("\t", " ").replace("\r", " ").replace("\n", " ")
|
||||
s = re.sub(r"\s+", " ", s)
|
||||
# Backslash escape for Postgres COPY text format
|
||||
s = s.replace("\\", "\\\\")
|
||||
return s
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print("usage: import-afir-historical.py INPUT.xlsx OUTPUT.tsv", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
in_path, out_path = sys.argv[1], sys.argv[2]
|
||||
|
||||
wb = openpyxl.load_workbook(in_path, read_only=True, data_only=True)
|
||||
ws = wb.active
|
||||
|
||||
rows = ws.iter_rows(values_only=True)
|
||||
header_idx = None
|
||||
for i, r in enumerate(rows):
|
||||
if r and r[0] and EXPECTED_HEADER in str(r[0]):
|
||||
header_idx = i
|
||||
break
|
||||
if i > 50:
|
||||
break
|
||||
if header_idx is None:
|
||||
print("[afir-import] ERROR: header row not found in first 50 rows", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
n_data = 0
|
||||
n_skipped = 0
|
||||
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
for r in rows:
|
||||
# 16 columns observed (last is None padding)
|
||||
if r is None:
|
||||
continue
|
||||
cells = list(r) + [None] * (16 - len(r))
|
||||
beneficiar = norm_text(cells[0])
|
||||
if not beneficiar:
|
||||
# Trailing empty rows
|
||||
n_skipped += 1
|
||||
continue
|
||||
|
||||
out = [
|
||||
beneficiar,
|
||||
norm_text(cells[1]), # last_name
|
||||
norm_text(cells[2]), # mama_cui
|
||||
norm_text(cells[3]), # localitate
|
||||
norm_text(cells[4]), # cod_masura
|
||||
norm_text(cells[5]), # obiectiv
|
||||
norm_text(cells[6]), # data_start
|
||||
norm_text(cells[7]), # data_end
|
||||
norm_num(cells[8]), # fega_op
|
||||
norm_num(cells[9]), # fega_total
|
||||
norm_num(cells[10]), # feadr_op
|
||||
norm_num(cells[11]), # feadr_total
|
||||
norm_num(cells[12]), # op_amount
|
||||
norm_num(cells[13]), # cofinantare
|
||||
norm_num(cells[14]), # ue_total
|
||||
]
|
||||
f.write("|".join(out) + "\n")
|
||||
n_data += 1
|
||||
if n_data % 50000 == 0:
|
||||
print(f"[afir-import] wrote {n_data} rows", file=sys.stderr)
|
||||
|
||||
print(f"[afir-import] done — {n_data} rows, {n_skipped} skipped", file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user