initial: split from gov-agreg — vreau.digital standalone platform
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
This commit is contained in:
@@ -0,0 +1,101 @@
|
||||
-- 042_cnsc_authority_cui_match.sql
|
||||
-- Backfill authority_cuis array on cnsc.decizii using the strip-parens +
|
||||
-- UAT-pattern strategy proven by bugetar 039 + curteacont 040.
|
||||
--
|
||||
-- Current state: 29,488 decizii, 12,527 (42%) have authority_cuis populated
|
||||
-- via the scraper's authority_cui_raw extraction. Remaining 16,961 (58%)
|
||||
-- have authority_name but no CUI.
|
||||
--
|
||||
-- CNSC names use these patterns:
|
||||
-- COMUNA X / ORASUL X / MUNICIPIUL X / JUDETUL X → UAT direct
|
||||
-- PRIMARIA COMUNEI X / PRIMARIA X → strip PRIMARIA, try UAT
|
||||
-- CONSILIUL JUDETEAN X / CJ X → "CONSILIUL JUDETEAN X" / "JUDETUL X"
|
||||
-- <COMPANY NAME> SA / SRL → direct firm name match
|
||||
-- <institution> — strip-parens fallback
|
||||
--
|
||||
-- This SQL UPDATEs authority_cuis = ARRAY[cui]::text[] when a match is found.
|
||||
-- The mv_per_authority_cui must be refreshed afterward.
|
||||
--
|
||||
-- Idempotent: only updates rows where authority_cuis IS NULL or empty.
|
||||
|
||||
\timing on
|
||||
|
||||
-- Build the UAT firm cache once (reusable across passes)
|
||||
DROP TABLE IF EXISTS tmp_firms_uat;
|
||||
CREATE TEMP TABLE tmp_firms_uat AS
|
||||
SELECT cui, name,
|
||||
firms.normalize_company_name(regexp_replace(name, '\s*\(.*$', '')) AS norm_stripped
|
||||
FROM firms.entities
|
||||
WHERE name ~* '^(COMUNA |JUDETUL |ORAS |ORASUL |MUNICIPIUL |SECTOR(UL)? [1-6]|CONSILIUL JUDETEAN |PRIMARIA )';
|
||||
CREATE INDEX ON tmp_firms_uat (norm_stripped);
|
||||
ANALYZE tmp_firms_uat;
|
||||
|
||||
-- Unmatched authority rows with normalized expected forms
|
||||
DROP TABLE IF EXISTS tmp_cnsc_unmatched;
|
||||
CREATE TEMP TABLE tmp_cnsc_unmatched AS
|
||||
SELECT id, authority_name,
|
||||
-- Expected ONRC-form normalized name (try several patterns; pick best one):
|
||||
firms.normalize_company_name(authority_name) AS direct_norm,
|
||||
firms.normalize_company_name(
|
||||
regexp_replace(authority_name, '^PRIMARIA\s+(COMUNEI\s+|ORASULUI\s+|MUNICIPIULUI\s+|JUDETULUI\s+)?', '', 'i')
|
||||
) AS primaria_stripped_norm,
|
||||
firms.normalize_company_name(
|
||||
CASE WHEN authority_name ~* '^CONSILIUL JUDETEAN '
|
||||
THEN 'JUDETUL ' || regexp_replace(authority_name, '^CONSILIUL JUDETEAN\s+', '', 'i')
|
||||
ELSE NULL END
|
||||
) AS cj_norm
|
||||
FROM cnsc.decizii
|
||||
WHERE (authority_cuis IS NULL OR array_length(authority_cuis, 1) IS NULL)
|
||||
AND authority_name IS NOT NULL;
|
||||
CREATE INDEX ON tmp_cnsc_unmatched (direct_norm);
|
||||
|
||||
-- Pass 1: direct strip-parens match (firm name matches CNSC authority_name)
|
||||
WITH candidates AS (
|
||||
SELECT DISTINCT ON (u.id) u.id, e.cui
|
||||
FROM tmp_cnsc_unmatched u
|
||||
JOIN firms.entities e
|
||||
ON firms.normalize_company_name(regexp_replace(e.name, '\s*\(.*$', '')) = u.direct_norm
|
||||
ORDER BY u.id, e.cui
|
||||
)
|
||||
UPDATE cnsc.decizii d
|
||||
SET authority_cuis = ARRAY[c.cui]::text[]
|
||||
FROM candidates c
|
||||
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
|
||||
|
||||
-- Pass 2: PRIMARIA-stripped match (PRIMARIA COMUNEI X → match COMUNA X / ORAS X etc.)
|
||||
WITH candidates AS (
|
||||
SELECT DISTINCT ON (u.id) u.id, f.cui
|
||||
FROM tmp_cnsc_unmatched u
|
||||
JOIN tmp_firms_uat f ON f.norm_stripped = u.primaria_stripped_norm
|
||||
WHERE u.primaria_stripped_norm != u.direct_norm -- only when PRIMARIA-strip changed the name
|
||||
ORDER BY u.id, f.cui
|
||||
)
|
||||
UPDATE cnsc.decizii d
|
||||
SET authority_cuis = ARRAY[c.cui]::text[]
|
||||
FROM candidates c
|
||||
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
|
||||
|
||||
-- Pass 3: CONSILIUL JUDETEAN X → JUDETUL X
|
||||
WITH candidates AS (
|
||||
SELECT DISTINCT ON (u.id) u.id, f.cui
|
||||
FROM tmp_cnsc_unmatched u
|
||||
JOIN tmp_firms_uat f ON f.norm_stripped = u.cj_norm
|
||||
WHERE u.cj_norm IS NOT NULL
|
||||
ORDER BY u.id, f.cui
|
||||
)
|
||||
UPDATE cnsc.decizii d
|
||||
SET authority_cuis = ARRAY[c.cui]::text[]
|
||||
FROM candidates c
|
||||
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
|
||||
|
||||
-- Refresh the per-authority MV
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui;
|
||||
|
||||
-- Final stats
|
||||
SELECT
|
||||
count(*) AS total,
|
||||
count(*) FILTER (WHERE array_length(authority_cuis, 1) > 0) AS with_auth_cui,
|
||||
round(100.0 * count(*) FILTER (WHERE array_length(authority_cuis, 1) > 0) / count(*), 1) AS pct
|
||||
FROM cnsc.decizii;
|
||||
|
||||
DROP TABLE tmp_firms_uat, tmp_cnsc_unmatched;
|
||||
Reference in New Issue
Block a user