initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
-- 039_bugetar_uat_pattern_match.sql
|
||||
-- High-precision CUI match for bugetar.entitate UAT entries by stripping
|
||||
-- ONRC parenthetical suffix "X (PRIMARIA Y)" and comparing normalized names.
|
||||
--
|
||||
-- Replaces the trgm-based 038 attempt which was too slow (90+ min, low yield)
|
||||
-- and hit false positives on the COMUNA-PRIMARIA naming gap.
|
||||
--
|
||||
-- Insight: ONRC stores comune/orașe with parenthetical suffix:
|
||||
-- "Comuna Surduc (Primaria Comunei Surduc)"
|
||||
-- "COMUNA CIZER (PRIMARIA)"
|
||||
-- "Comuna Mesesenii de Jos (Primaria Mesesenii de Jos Jud. Salaj)"
|
||||
-- Bugetar entries are clean: "COMUNA SURDUC". Stripping ' (...)' from ONRC
|
||||
-- name and comparing normalized → exact match → high-confidence resolve.
|
||||
--
|
||||
-- Idempotent: UPDATEs only WHERE cui IS NULL. Threshold-agnostic.
|
||||
-- Test runtime: ~1.7s per judet, 42 judete → ~70s total.
|
||||
|
||||
\timing on
|
||||
SET pg_trgm.similarity_threshold = 0.78; -- not used here but reset for safety
|
||||
|
||||
WITH judet_map AS (
|
||||
SELECT * FROM (VALUES
|
||||
('AB','ALBA'),('AG','ARGES'),('AR','ARAD'),('B','MUNICIPIUL BUCURESTI'),
|
||||
('BC','BACAU'),('BH','BIHOR'),('BN','BISTRITA-NASAUD'),('BR','BRAILA'),
|
||||
('BT','BOTOSANI'),('BV','BRASOV'),('BZ','BUZAU'),('CJ','CLUJ'),
|
||||
('CL','CALARASI'),('CS','CARAS-SEVERIN'),('CT','CONSTANTA'),('CV','COVASNA'),
|
||||
('DB','DAMBOVITA'),('DJ','DOLJ'),('GJ','GORJ'),('GL','GALATI'),
|
||||
('GR','GIURGIU'),('HD','HUNEDOARA'),('HR','HARGHITA'),('IF','ILFOV'),
|
||||
('IL','IALOMITA'),('IS','IASI'),('MH','MEHEDINTI'),('MM','MARAMURES'),
|
||||
('MS','MURES'),('NT','NEAMT'),('OT','OLT'),('PH','PRAHOVA'),
|
||||
('SB','SIBIU'),('SJ','SALAJ'),('SM','SATU MARE'),('SV','SUCEAVA'),
|
||||
('TL','TULCEA'),('TM','TIMIS'),('TR','TELEORMAN'),('VL','VALCEA'),
|
||||
('VN','VRANCEA'),('VS','VASLUI')
|
||||
) AS m(code, name_ascii)
|
||||
),
|
||||
candidates AS (
|
||||
SELECT DISTINCT ON (b.id) b.id, e.cui, e.name AS firm_name
|
||||
FROM bugetar.entitate b
|
||||
JOIN judet_map jm ON jm.code = b.judet
|
||||
JOIN firms.entities e
|
||||
ON firms.normalize_company_name(regexp_replace(e.name, '\s*\(.*$', '')) = firms.normalize_company_name(b.entity_name)
|
||||
AND UPPER(translate(COALESCE(e.adr_judet,''), 'ŞȘŢȚăâîĂÂÎ', 'SSTTAAIAAI')) = jm.name_ascii
|
||||
WHERE b.cui IS NULL
|
||||
ORDER BY b.id, e.cui -- deterministic when multiple firms share normalized stripped name
|
||||
)
|
||||
UPDATE bugetar.entitate b
|
||||
SET cui = c.cui,
|
||||
cui_match_score = 0.95::numeric(5,2), -- high-confidence stripped-exact match
|
||||
cui_match_method = 'uat_pattern',
|
||||
updated_at = now()
|
||||
FROM candidates c
|
||||
WHERE b.id = c.id AND b.cui IS NULL;
|
||||
|
||||
-- Final stats
|
||||
SELECT cui_match_method, count(*),
|
||||
round(avg(cui_match_score)::numeric, 2) AS avg_score
|
||||
FROM bugetar.entitate
|
||||
WHERE cui IS NOT NULL
|
||||
GROUP BY 1
|
||||
ORDER BY 2 DESC;
|
||||
|
||||
SELECT 'unmatched' AS t, count(*) FROM bugetar.entitate WHERE cui IS NULL;
|
||||
Reference in New Issue
Block a user