initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
@@ -0,0 +1,223 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fast CUI → location resolver using ANAF dateidentificare bulk CSV.
|
||||
Reads 726MB CSV, matches against our 14K+ CUI list, updates DB.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
import sys
|
||||
import psycopg2
|
||||
from psycopg2.extras import execute_values
|
||||
|
||||
DB_URL = os.environ.get('DATABASE_URL',
|
||||
'postgresql://architools_user:stictMyFon34!_gonY@10.10.10.166:5432/architools_db')
|
||||
|
||||
ANAF_CSV = os.path.join(os.path.dirname(__file__), 'data', 'dateidentificare2025.csv')
|
||||
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DB_URL)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Step 1: Get all unique CUIs we need to resolve
|
||||
print("Loading CUI list from DB...")
|
||||
cur.execute("""
|
||||
SELECT DISTINCT authority_cui FROM seap.direct_acquisitions
|
||||
WHERE authority_cui IS NOT NULL
|
||||
UNION
|
||||
SELECT DISTINCT supplier_cui FROM seap.direct_acquisitions
|
||||
WHERE supplier_cui IS NOT NULL
|
||||
UNION
|
||||
SELECT DISTINCT authority_cui FROM seap.public_notices
|
||||
WHERE authority_cui IS NOT NULL
|
||||
""")
|
||||
needed_cuis = set()
|
||||
for row in cur.fetchall():
|
||||
cui = str(row[0]).strip().replace('RO', '').replace('ro', '')
|
||||
if cui.isdigit():
|
||||
needed_cuis.add(cui)
|
||||
print(f" Need location for {len(needed_cuis)} CUIs")
|
||||
|
||||
# Step 2: Ensure cui_location table exists
|
||||
cur.execute("""
|
||||
CREATE TABLE IF NOT EXISTS seap.cui_location (
|
||||
cui TEXT PRIMARY KEY,
|
||||
name TEXT,
|
||||
city TEXT,
|
||||
county TEXT,
|
||||
updated_at TIMESTAMPTZ DEFAULT now()
|
||||
)
|
||||
""")
|
||||
# Add siruta column if missing
|
||||
cur.execute("ALTER TABLE seap.cui_location ADD COLUMN IF NOT EXISTS siruta TEXT")
|
||||
conn.commit()
|
||||
|
||||
# Step 3: Read ANAF CSV and match
|
||||
print(f"Reading ANAF CSV: {ANAF_CSV}...")
|
||||
matched = 0
|
||||
batch = []
|
||||
line_count = 0
|
||||
|
||||
with open(ANAF_CSV, 'r', encoding='iso-8859-16', errors='replace') as f:
|
||||
reader = csv.reader(f, delimiter='^')
|
||||
headers = next(reader)
|
||||
|
||||
# Find column indices
|
||||
h_map = {h.strip().upper(): i for i, h in enumerate(headers)}
|
||||
cui_idx = h_map.get('COD_FISCAL', 0)
|
||||
name_idx = h_map.get('DENUMIRE', 1)
|
||||
city_idx = h_map.get('LOCALITATE', 5)
|
||||
county_idx = h_map.get('JUDET', 22) # JUDET is col 22 (not JUDET_COMERT which is 13)
|
||||
|
||||
print(f" Columns: CUI={cui_idx}, Name={name_idx}, City={city_idx}, County={county_idx}")
|
||||
print(f" Headers sample: {headers[:8]}")
|
||||
|
||||
for row in reader:
|
||||
line_count += 1
|
||||
if line_count % 500000 == 0:
|
||||
print(f" Processed {line_count} lines, matched {matched}...")
|
||||
|
||||
if len(row) <= max(cui_idx, name_idx, city_idx, county_idx):
|
||||
continue
|
||||
|
||||
cui = row[cui_idx].strip()
|
||||
if cui not in needed_cuis:
|
||||
continue
|
||||
|
||||
name = row[name_idx].strip() if row[name_idx] else None
|
||||
city = row[city_idx].strip() if row[city_idx] else None
|
||||
county = row[county_idx].strip() if row[county_idx] else None
|
||||
|
||||
if city:
|
||||
batch.append((cui, name, city, county))
|
||||
matched += 1
|
||||
|
||||
if len(batch) >= 5000:
|
||||
_insert_batch(cur, batch)
|
||||
conn.commit()
|
||||
batch = []
|
||||
|
||||
if batch:
|
||||
_insert_batch(cur, batch)
|
||||
conn.commit()
|
||||
|
||||
print(f"\n Total lines: {line_count}")
|
||||
print(f" Matched CUIs: {matched} / {len(needed_cuis)}")
|
||||
|
||||
# Step 4: Match cui_location → SIRUTA
|
||||
print("\nMatching locations to SIRUTA...")
|
||||
|
||||
# Exact match
|
||||
cur.execute("""
|
||||
UPDATE seap.cui_location cl
|
||||
SET siruta = u.siruta
|
||||
FROM public."GisUat" u
|
||||
WHERE cl.siruta IS NULL AND cl.city IS NOT NULL AND cl.county IS NOT NULL
|
||||
AND seap.normalize_locality(u.name) = seap.normalize_locality(cl.city)
|
||||
AND seap.normalize_locality(u.county) = seap.normalize_locality(cl.county)
|
||||
""")
|
||||
exact = cur.rowcount
|
||||
print(f" Exact match: {exact}")
|
||||
|
||||
# Fuzzy match
|
||||
cur.execute("""
|
||||
UPDATE seap.cui_location cl
|
||||
SET siruta = sub.siruta
|
||||
FROM (
|
||||
SELECT DISTINCT ON (cl2.cui)
|
||||
cl2.cui, u.siruta,
|
||||
similarity(seap.normalize_locality(u.name), seap.normalize_locality(cl2.city)) AS score
|
||||
FROM seap.cui_location cl2
|
||||
JOIN public."GisUat" u
|
||||
ON seap.normalize_locality(u.county) = seap.normalize_locality(cl2.county)
|
||||
WHERE cl2.siruta IS NULL AND cl2.city IS NOT NULL AND cl2.county IS NOT NULL
|
||||
AND similarity(seap.normalize_locality(u.name), seap.normalize_locality(cl2.city)) > 0.3
|
||||
ORDER BY cl2.cui, score DESC
|
||||
) sub
|
||||
WHERE cl.cui = sub.cui
|
||||
""")
|
||||
fuzzy = cur.rowcount
|
||||
print(f" Fuzzy match: {fuzzy}")
|
||||
conn.commit()
|
||||
|
||||
# Step 5: Propagate SIRUTA to DA records
|
||||
print("\nUpdating DA records with SIRUTA...")
|
||||
cur.execute("""
|
||||
UPDATE seap.direct_acquisitions da
|
||||
SET authority_siruta = cl.siruta
|
||||
FROM seap.cui_location cl
|
||||
WHERE da.authority_cui = cl.cui AND cl.siruta IS NOT NULL
|
||||
AND (da.authority_siruta IS NULL OR da.authority_siruta != cl.siruta)
|
||||
""")
|
||||
da_updated = cur.rowcount
|
||||
print(f" DA records updated: {da_updated}")
|
||||
|
||||
cur.execute("""
|
||||
UPDATE seap.public_notices pn
|
||||
SET authority_siruta = cl.siruta
|
||||
FROM seap.cui_location cl
|
||||
WHERE pn.authority_cui = cl.cui AND cl.siruta IS NOT NULL
|
||||
AND (pn.authority_siruta IS NULL OR pn.authority_siruta != cl.siruta)
|
||||
""")
|
||||
pn_updated = cur.rowcount
|
||||
print(f" Notice records updated: {pn_updated}")
|
||||
conn.commit()
|
||||
|
||||
# Step 6: Refresh materialized view
|
||||
print("\nRefreshing materialized view...")
|
||||
cur.execute("DROP MATERIALIZED VIEW IF EXISTS seap.uat_procurement_stats")
|
||||
cur.execute("""
|
||||
CREATE MATERIALIZED VIEW seap.uat_procurement_stats AS
|
||||
SELECT
|
||||
u.siruta, u.name AS uat_name, u.county,
|
||||
COALESCE(da_s.da_count, 0)::bigint AS da_count,
|
||||
COALESCE(da_s.da_total_value, 0)::numeric AS da_total_value,
|
||||
COALESCE(pn_s.notice_count, 0)::bigint AS notice_count,
|
||||
COALESCE(pn_s.notice_total_value, 0)::numeric AS notice_total_value,
|
||||
(COALESCE(da_s.da_count, 0) + COALESCE(pn_s.notice_count, 0))::bigint AS total_contracts,
|
||||
(COALESCE(da_s.da_total_value, 0) + COALESCE(pn_s.notice_total_value, 0))::numeric AS total_value
|
||||
FROM public."GisUat" u
|
||||
LEFT JOIN (
|
||||
SELECT authority_siruta AS siruta, COUNT(*) AS da_count, SUM(closing_value) AS da_total_value
|
||||
FROM seap.direct_acquisitions WHERE authority_siruta IS NOT NULL
|
||||
GROUP BY authority_siruta
|
||||
) da_s ON da_s.siruta = u.siruta
|
||||
LEFT JOIN (
|
||||
SELECT authority_siruta AS siruta, COUNT(*) AS notice_count, SUM(contract_value) AS notice_total_value
|
||||
FROM seap.public_notices WHERE authority_siruta IS NOT NULL
|
||||
GROUP BY authority_siruta
|
||||
) pn_s ON pn_s.siruta = u.siruta
|
||||
""")
|
||||
cur.execute("CREATE UNIQUE INDEX idx_ups_siruta ON seap.uat_procurement_stats(siruta)")
|
||||
conn.commit()
|
||||
|
||||
# Final stats
|
||||
cur.execute("SELECT COUNT(*) FROM seap.uat_procurement_stats WHERE total_contracts > 0")
|
||||
uats = cur.fetchone()[0]
|
||||
cur.execute("SELECT COUNT(*) FROM seap.cui_location WHERE siruta IS NOT NULL")
|
||||
located = cur.fetchone()[0]
|
||||
cur.execute("SELECT COUNT(*) FROM seap.cui_location")
|
||||
total_cui = cur.fetchone()[0]
|
||||
|
||||
print(f"\n=== Done ===")
|
||||
print(f" CUI located: {located} / {total_cui}")
|
||||
print(f" UATs with data: {uats}")
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
def _insert_batch(cur, batch):
|
||||
execute_values(cur, """
|
||||
INSERT INTO seap.cui_location (cui, name, city, county)
|
||||
VALUES %s
|
||||
ON CONFLICT (cui) DO UPDATE SET
|
||||
name = COALESCE(EXCLUDED.name, seap.cui_location.name),
|
||||
city = COALESCE(EXCLUDED.city, seap.cui_location.city),
|
||||
county = COALESCE(EXCLUDED.county, seap.cui_location.county),
|
||||
updated_at = now()
|
||||
""", batch)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user