initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
+167
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python3
|
||||
"""APIA "Lista fermieri" XLSX → pipe-delimited TSV normalizer.
|
||||
|
||||
Source: data.gov.ro CKAN package "lista-fermierilor-campania-apia-2024".
|
||||
Currently a single resource (comuna Găgești, Vaslui, ~192 farmers), but the
|
||||
package is supposed to grow as more UATs publish their lists. The XLSX
|
||||
schema is set by APIA and identical across UATs:
|
||||
|
||||
Row 0 (header): NR.CRT | NUME PRENUME | RESPONSABIL UAT 2024
|
||||
| COMUNA/ORAS | SAT | DATE CONTACT | CENTRUL APIA
|
||||
| SUPRAFATA 2023 | (~17 None columns)
|
||||
Rows 1..N (data): one row per farmer, NR.CRT 1-indexed.
|
||||
|
||||
Output: pipe-delimited TSV (no quoting), columns in this order:
|
||||
|
||||
campaign_year | name | comuna_oras | sat | centru_apia
|
||||
| responsabil_uat | suprafata_ha
|
||||
| source_dataset_id | source_resource_id | source_url
|
||||
|
||||
Empty strings stay empty (NULL in COPY with NULL '').
|
||||
|
||||
Usage:
|
||||
python3 import-apia-fermieri.py INPUT.xlsx OUTPUT.tsv \\
|
||||
CAMPAIGN_YEAR DATASET_ID RESOURCE_ID SOURCE_URL
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
import openpyxl
|
||||
|
||||
EXPECTED_HEADER_COL0 = "NR.CRT"
|
||||
EXPECTED_HEADER_COL1 = "NUME" # "NUME PRENUME" or "NUME SI PRENUME"
|
||||
|
||||
|
||||
def norm_text(v):
|
||||
if v is None:
|
||||
return ""
|
||||
s = str(v).strip()
|
||||
if not s:
|
||||
return ""
|
||||
# Pipe is our delimiter — replace embedded pipes; collapse newlines.
|
||||
s = s.replace("|", "/").replace("\t", " ").replace("\r", " ").replace("\n", " ")
|
||||
s = re.sub(r"\s+", " ", s)
|
||||
s = s.replace("\\", "\\\\")
|
||||
return s
|
||||
|
||||
|
||||
def norm_num(v):
|
||||
if v is None:
|
||||
return ""
|
||||
if isinstance(v, (int, float)):
|
||||
# APIA SUPRAFATA arrives as float ("1.04", "12.45") — already English.
|
||||
# Trim trailing zeros after decimal.
|
||||
s = f"{v:.4f}"
|
||||
s = s.rstrip("0").rstrip(".")
|
||||
return s if s else "0"
|
||||
s = str(v).strip()
|
||||
if not s:
|
||||
return ""
|
||||
if "," in s:
|
||||
s = s.replace(".", "").replace(",", ".")
|
||||
return s.replace("|", "/")
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 7:
|
||||
print(
|
||||
"usage: import-apia-fermieri.py INPUT.xlsx OUTPUT.tsv "
|
||||
"CAMPAIGN_YEAR DATASET_ID RESOURCE_ID SOURCE_URL",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(2)
|
||||
|
||||
in_path = sys.argv[1]
|
||||
out_path = sys.argv[2]
|
||||
campaign_year = sys.argv[3]
|
||||
dataset_id = sys.argv[4]
|
||||
resource_id = sys.argv[5]
|
||||
source_url = sys.argv[6]
|
||||
|
||||
wb = openpyxl.load_workbook(in_path, read_only=True, data_only=True)
|
||||
ws = wb.active
|
||||
|
||||
rows = ws.iter_rows(values_only=True)
|
||||
header_idx = None
|
||||
col_map = None
|
||||
for i, r in enumerate(rows):
|
||||
if not r:
|
||||
continue
|
||||
if r[0] and EXPECTED_HEADER_COL0 in str(r[0]).upper():
|
||||
# Build column index map from header for resilience.
|
||||
header = [str(c).strip().upper() if c is not None else "" for c in r]
|
||||
col_map = {}
|
||||
for idx, h in enumerate(header):
|
||||
if "NR.CRT" in h or "NRCRT" in h:
|
||||
col_map["nr"] = idx
|
||||
elif "NUME" in h: # "NUME PRENUME" / "NUME SI PRENUME"
|
||||
col_map.setdefault("name", idx)
|
||||
elif "RESPONSABIL" in h:
|
||||
col_map["responsabil"] = idx
|
||||
elif "COMUNA" in h or "ORAS" in h:
|
||||
col_map["comuna"] = idx
|
||||
elif h == "SAT" or h.startswith("SAT "):
|
||||
col_map["sat"] = idx
|
||||
elif "CENTRUL" in h or "CENTRU" in h:
|
||||
col_map["centru"] = idx
|
||||
elif "SUPRAFATA" in h or "SUPRAFAȚA" in h:
|
||||
col_map["suprafata"] = idx
|
||||
header_idx = i
|
||||
break
|
||||
if i > 50:
|
||||
break
|
||||
|
||||
if header_idx is None or not col_map or "name" not in col_map:
|
||||
print(
|
||||
"[apia-import] ERROR: header row not found in first 50 rows",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"[apia-import] header at row {header_idx}, col_map={col_map}", file=sys.stderr)
|
||||
|
||||
n_data = 0
|
||||
n_skipped = 0
|
||||
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
for r in rows:
|
||||
if r is None:
|
||||
continue
|
||||
cells = list(r)
|
||||
# Pad if short
|
||||
max_idx = max(col_map.values()) if col_map else 0
|
||||
while len(cells) <= max_idx:
|
||||
cells.append(None)
|
||||
|
||||
name = norm_text(cells[col_map["name"]])
|
||||
if not name:
|
||||
n_skipped += 1
|
||||
continue
|
||||
|
||||
comuna = norm_text(cells[col_map["comuna"]]) if "comuna" in col_map else ""
|
||||
sat = norm_text(cells[col_map["sat"]]) if "sat" in col_map else ""
|
||||
centru = norm_text(cells[col_map["centru"]]) if "centru" in col_map else ""
|
||||
responsabil = norm_text(cells[col_map["responsabil"]]) if "responsabil" in col_map else ""
|
||||
suprafata = norm_num(cells[col_map["suprafata"]]) if "suprafata" in col_map else ""
|
||||
|
||||
out = [
|
||||
campaign_year,
|
||||
name,
|
||||
comuna,
|
||||
sat,
|
||||
centru,
|
||||
responsabil,
|
||||
suprafata,
|
||||
dataset_id,
|
||||
resource_id,
|
||||
source_url,
|
||||
]
|
||||
f.write("|".join(out) + "\n")
|
||||
n_data += 1
|
||||
|
||||
print(f"[apia-import] done — {n_data} rows, {n_skipped} skipped", file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user