initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
"""XLSX/XLS → CSV converter for SEAP data.gov.ro yearly dumps.
|
||||
|
||||
Reads the first sheet, writes a UTF-8 CSV (comma + double-quote) so the
|
||||
existing SEAP normalizer (import-seap-historical.py) can ingest it.
|
||||
|
||||
Auto-detects file format:
|
||||
- XLSX (zip archive) → openpyxl
|
||||
- XLS (BIFF8 OLE) → xlrd 1.x
|
||||
|
||||
Usage: python3 xlsx-to-csv.py INPUT.{xlsx|xls} OUTPUT.csv
|
||||
"""
|
||||
import csv
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def is_xlsx(path: Path) -> bool:
|
||||
"""XLSX is a ZIP archive (PK header)."""
|
||||
with path.open("rb") as f:
|
||||
return f.read(2) == b"PK"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(sys.argv) != 3:
|
||||
print(__doc__)
|
||||
sys.exit(2)
|
||||
src = Path(sys.argv[1])
|
||||
dst = Path(sys.argv[2])
|
||||
written = 0
|
||||
|
||||
if is_xlsx(src):
|
||||
import openpyxl
|
||||
wb = openpyxl.load_workbook(src, read_only=True, data_only=True)
|
||||
ws = wb.active
|
||||
with dst.open("w", encoding="utf-8", newline="") as f:
|
||||
w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
|
||||
for row in ws.iter_rows(values_only=True):
|
||||
out = []
|
||||
for v in row:
|
||||
if v is None:
|
||||
out.append("")
|
||||
elif isinstance(v, datetime):
|
||||
out.append(v.strftime("%m/%d/%Y %H:%M:%S"))
|
||||
elif isinstance(v, float) and v.is_integer():
|
||||
out.append(str(int(v)))
|
||||
else:
|
||||
out.append(str(v))
|
||||
w.writerow(out)
|
||||
written += 1
|
||||
else:
|
||||
# Legacy XLS via xlrd 1.x — concat ALL sheets (some big SEAP files use
|
||||
# multiple sheets due to the 65k row limit in old XLS format).
|
||||
import xlrd
|
||||
b = xlrd.open_workbook(str(src))
|
||||
wrote_header = False
|
||||
with dst.open("w", encoding="utf-8", newline="") as f:
|
||||
w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
|
||||
for sidx, sname in enumerate(b.sheet_names()):
|
||||
sh = b.sheet_by_index(sidx)
|
||||
if sh.nrows == 0:
|
||||
continue
|
||||
start = 0
|
||||
if wrote_header:
|
||||
start = 1 # skip repeated header on subsequent sheets
|
||||
else:
|
||||
wrote_header = True
|
||||
for ridx in range(start, sh.nrows):
|
||||
row = sh.row(ridx)
|
||||
out = []
|
||||
for cell in row:
|
||||
if cell.ctype == xlrd.XL_CELL_EMPTY or cell.ctype == xlrd.XL_CELL_BLANK:
|
||||
out.append("")
|
||||
elif cell.ctype == xlrd.XL_CELL_DATE:
|
||||
try:
|
||||
tup = xlrd.xldate_as_tuple(cell.value, b.datemode)
|
||||
out.append(datetime(*tup).strftime("%m/%d/%Y %H:%M:%S"))
|
||||
except Exception:
|
||||
out.append(str(cell.value))
|
||||
elif cell.ctype == xlrd.XL_CELL_NUMBER:
|
||||
v = cell.value
|
||||
if v == int(v):
|
||||
out.append(str(int(v)))
|
||||
else:
|
||||
out.append(str(v))
|
||||
else:
|
||||
out.append(str(cell.value))
|
||||
w.writerow(out)
|
||||
written += 1
|
||||
print(f"[xlsx2csv] {src.name} → {dst.name}: {written} rows", file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user