a6c03a091e
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
96 lines
3.5 KiB
Python
96 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
|
"""XLSX/XLS → CSV converter for SEAP data.gov.ro yearly dumps.
|
|
|
|
Reads the first sheet, writes a UTF-8 CSV (comma + double-quote) so the
|
|
existing SEAP normalizer (import-seap-historical.py) can ingest it.
|
|
|
|
Auto-detects file format:
|
|
- XLSX (zip archive) → openpyxl
|
|
- XLS (BIFF8 OLE) → xlrd 1.x
|
|
|
|
Usage: python3 xlsx-to-csv.py INPUT.{xlsx|xls} OUTPUT.csv
|
|
"""
|
|
import csv
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
|
|
def is_xlsx(path: Path) -> bool:
|
|
"""XLSX is a ZIP archive (PK header)."""
|
|
with path.open("rb") as f:
|
|
return f.read(2) == b"PK"
|
|
|
|
|
|
def main() -> None:
|
|
if len(sys.argv) != 3:
|
|
print(__doc__)
|
|
sys.exit(2)
|
|
src = Path(sys.argv[1])
|
|
dst = Path(sys.argv[2])
|
|
written = 0
|
|
|
|
if is_xlsx(src):
|
|
import openpyxl
|
|
wb = openpyxl.load_workbook(src, read_only=True, data_only=True)
|
|
ws = wb.active
|
|
with dst.open("w", encoding="utf-8", newline="") as f:
|
|
w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
|
|
for row in ws.iter_rows(values_only=True):
|
|
out = []
|
|
for v in row:
|
|
if v is None:
|
|
out.append("")
|
|
elif isinstance(v, datetime):
|
|
out.append(v.strftime("%m/%d/%Y %H:%M:%S"))
|
|
elif isinstance(v, float) and v.is_integer():
|
|
out.append(str(int(v)))
|
|
else:
|
|
out.append(str(v))
|
|
w.writerow(out)
|
|
written += 1
|
|
else:
|
|
# Legacy XLS via xlrd 1.x — concat ALL sheets (some big SEAP files use
|
|
# multiple sheets due to the 65k row limit in old XLS format).
|
|
import xlrd
|
|
b = xlrd.open_workbook(str(src))
|
|
wrote_header = False
|
|
with dst.open("w", encoding="utf-8", newline="") as f:
|
|
w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
|
|
for sidx, sname in enumerate(b.sheet_names()):
|
|
sh = b.sheet_by_index(sidx)
|
|
if sh.nrows == 0:
|
|
continue
|
|
start = 0
|
|
if wrote_header:
|
|
start = 1 # skip repeated header on subsequent sheets
|
|
else:
|
|
wrote_header = True
|
|
for ridx in range(start, sh.nrows):
|
|
row = sh.row(ridx)
|
|
out = []
|
|
for cell in row:
|
|
if cell.ctype == xlrd.XL_CELL_EMPTY or cell.ctype == xlrd.XL_CELL_BLANK:
|
|
out.append("")
|
|
elif cell.ctype == xlrd.XL_CELL_DATE:
|
|
try:
|
|
tup = xlrd.xldate_as_tuple(cell.value, b.datemode)
|
|
out.append(datetime(*tup).strftime("%m/%d/%Y %H:%M:%S"))
|
|
except Exception:
|
|
out.append(str(cell.value))
|
|
elif cell.ctype == xlrd.XL_CELL_NUMBER:
|
|
v = cell.value
|
|
if v == int(v):
|
|
out.append(str(int(v)))
|
|
else:
|
|
out.append(str(v))
|
|
else:
|
|
out.append(str(cell.value))
|
|
w.writerow(out)
|
|
written += 1
|
|
print(f"[xlsx2csv] {src.name} → {dst.name}: {written} rows", file=sys.stderr)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|