a6c03a091e
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
140 lines
4.4 KiB
Python
Executable File
140 lines
4.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""AFIR XLSX → pipe-delimited TSV normalizer.
|
|
|
|
Source: AFIR yearly listaplati XLSX (FEADR or FEGA), as published at
|
|
https://www.afir.ro/rapoarte/beneficiari-de-fonduri-europene/date-deschise/
|
|
|
|
The XLSX has 9 banner rows, then a 15-column header at row 10 (1-indexed),
|
|
then ~470K-560K data rows. Schema (since 2023, identical for 2024):
|
|
|
|
Numele beneficiarului
|
|
Numele de familie al beneficiarului
|
|
Denumirea societatii-mama si codul de inregistrare fiscala
|
|
Localitate
|
|
Codul masurii/tipului de interventie
|
|
Obiectiv
|
|
Data inceperii
|
|
Data incheierii
|
|
Cuantum Operatiune FEGA
|
|
Cuantum Total FEGA
|
|
Cuantum Operatiune FEADR
|
|
Cuantum Total FEADR
|
|
Cuantum aferent operatiunii
|
|
Cuantum total cofinantare beneficiari
|
|
Cuantum total UE Beneficiar
|
|
|
|
Output: pipe-delimited TSV (no quoting), in the same column order, suitable
|
|
for `\\copy fonduri.staging_afir FROM ... WITH (FORMAT text, DELIMITER '|')`.
|
|
|
|
Usage:
|
|
python3 import-afir-historical.py INPUT.xlsx OUTPUT.tsv
|
|
|
|
Numeric columns are normalized: Romanian decimal "12.345,67" → "12345.67".
|
|
Empty strings stay empty (NULL in COPY with NULL '').
|
|
"""
|
|
|
|
import sys
|
|
import re
|
|
|
|
import openpyxl
|
|
|
|
EXPECTED_HEADER = "Numele beneficiarului"
|
|
|
|
|
|
def norm_num(v):
|
|
if v is None:
|
|
return ""
|
|
if isinstance(v, (int, float)):
|
|
# Already numeric (rare for AFIR XLSX — values arrive as strings).
|
|
return f"{v:.2f}".replace("-0.00", "0.00")
|
|
s = str(v).strip()
|
|
if not s:
|
|
return ""
|
|
# Strip thousands "." and convert "," → "."
|
|
# AFIR uses Romanian format: 12.345,67 or 12345,67 or 0,00
|
|
if "," in s:
|
|
s = s.replace(".", "").replace(",", ".")
|
|
# Strip leading/trailing whitespace, replace any embedded pipe to be safe
|
|
return s.replace("|", "/")
|
|
|
|
|
|
def norm_text(v):
|
|
if v is None:
|
|
return ""
|
|
s = str(v).strip()
|
|
if not s:
|
|
return ""
|
|
# COPY text format: tab and pipe collide with our delimiter; backslash needs escape.
|
|
# We chose pipe as delimiter — replace embedded pipes with "/".
|
|
# Newlines collapse to space.
|
|
s = s.replace("|", "/").replace("\t", " ").replace("\r", " ").replace("\n", " ")
|
|
s = re.sub(r"\s+", " ", s)
|
|
# Backslash escape for Postgres COPY text format
|
|
s = s.replace("\\", "\\\\")
|
|
return s
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) != 3:
|
|
print("usage: import-afir-historical.py INPUT.xlsx OUTPUT.tsv", file=sys.stderr)
|
|
sys.exit(2)
|
|
in_path, out_path = sys.argv[1], sys.argv[2]
|
|
|
|
wb = openpyxl.load_workbook(in_path, read_only=True, data_only=True)
|
|
ws = wb.active
|
|
|
|
rows = ws.iter_rows(values_only=True)
|
|
header_idx = None
|
|
for i, r in enumerate(rows):
|
|
if r and r[0] and EXPECTED_HEADER in str(r[0]):
|
|
header_idx = i
|
|
break
|
|
if i > 50:
|
|
break
|
|
if header_idx is None:
|
|
print("[afir-import] ERROR: header row not found in first 50 rows", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
n_data = 0
|
|
n_skipped = 0
|
|
|
|
with open(out_path, "w", encoding="utf-8") as f:
|
|
for r in rows:
|
|
# 16 columns observed (last is None padding)
|
|
if r is None:
|
|
continue
|
|
cells = list(r) + [None] * (16 - len(r))
|
|
beneficiar = norm_text(cells[0])
|
|
if not beneficiar:
|
|
# Trailing empty rows
|
|
n_skipped += 1
|
|
continue
|
|
|
|
out = [
|
|
beneficiar,
|
|
norm_text(cells[1]), # last_name
|
|
norm_text(cells[2]), # mama_cui
|
|
norm_text(cells[3]), # localitate
|
|
norm_text(cells[4]), # cod_masura
|
|
norm_text(cells[5]), # obiectiv
|
|
norm_text(cells[6]), # data_start
|
|
norm_text(cells[7]), # data_end
|
|
norm_num(cells[8]), # fega_op
|
|
norm_num(cells[9]), # fega_total
|
|
norm_num(cells[10]), # feadr_op
|
|
norm_num(cells[11]), # feadr_total
|
|
norm_num(cells[12]), # op_amount
|
|
norm_num(cells[13]), # cofinantare
|
|
norm_num(cells[14]), # ue_total
|
|
]
|
|
f.write("|".join(out) + "\n")
|
|
n_data += 1
|
|
if n_data % 50000 == 0:
|
|
print(f"[afir-import] wrote {n_data} rows", file=sys.stderr)
|
|
|
|
print(f"[afir-import] done — {n_data} rows, {n_skipped} skipped", file=sys.stderr)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|