a6c03a091e
Moved from gov-agreg/src/pages/achizitii/* to root (drop prefix). - 22 pages migrated, 127 files total - All internal links: /achizitii/X → /X (176 occurrences fixed) - AchizitiiLayout subnav rewritten: /X paths, top-right link to vreaudigital.ro hub - BaseLayout new (vreau.digital branding, OG tags, site URL) - astro.config.mjs: site https://vreau.digital, server output (was static) - docker-compose: port 5096 (vreaudigital is 5095), container vreau-digital - deploy.sh: paths /opt/vreau-digital, log /var/log/vreau-digital-deploy.log Backend shared with gov-agreg: - PostgreSQL satra (same schemas: seap, firms, anaf, anre, ...) - Photon, Martin tiles - Infisical /vreaudigital path (DATABASE_URL etc. shared) build: PASS (npx astro check 0 errors, npm run build 5s vite + 10s server)
102 lines
4.1 KiB
SQL
102 lines
4.1 KiB
SQL
-- 042_cnsc_authority_cui_match.sql
|
|
-- Backfill authority_cuis array on cnsc.decizii using the strip-parens +
|
|
-- UAT-pattern strategy proven by bugetar 039 + curteacont 040.
|
|
--
|
|
-- Current state: 29,488 decizii, 12,527 (42%) have authority_cuis populated
|
|
-- via the scraper's authority_cui_raw extraction. Remaining 16,961 (58%)
|
|
-- have authority_name but no CUI.
|
|
--
|
|
-- CNSC names use these patterns:
|
|
-- COMUNA X / ORASUL X / MUNICIPIUL X / JUDETUL X → UAT direct
|
|
-- PRIMARIA COMUNEI X / PRIMARIA X → strip PRIMARIA, try UAT
|
|
-- CONSILIUL JUDETEAN X / CJ X → "CONSILIUL JUDETEAN X" / "JUDETUL X"
|
|
-- <COMPANY NAME> SA / SRL → direct firm name match
|
|
-- <institution> — strip-parens fallback
|
|
--
|
|
-- This SQL UPDATEs authority_cuis = ARRAY[cui]::text[] when a match is found.
|
|
-- The mv_per_authority_cui must be refreshed afterward.
|
|
--
|
|
-- Idempotent: only updates rows where authority_cuis IS NULL or empty.
|
|
|
|
\timing on
|
|
|
|
-- Build the UAT firm cache once (reusable across passes)
|
|
DROP TABLE IF EXISTS tmp_firms_uat;
|
|
CREATE TEMP TABLE tmp_firms_uat AS
|
|
SELECT cui, name,
|
|
firms.normalize_company_name(regexp_replace(name, '\s*\(.*$', '')) AS norm_stripped
|
|
FROM firms.entities
|
|
WHERE name ~* '^(COMUNA |JUDETUL |ORAS |ORASUL |MUNICIPIUL |SECTOR(UL)? [1-6]|CONSILIUL JUDETEAN |PRIMARIA )';
|
|
CREATE INDEX ON tmp_firms_uat (norm_stripped);
|
|
ANALYZE tmp_firms_uat;
|
|
|
|
-- Unmatched authority rows with normalized expected forms
|
|
DROP TABLE IF EXISTS tmp_cnsc_unmatched;
|
|
CREATE TEMP TABLE tmp_cnsc_unmatched AS
|
|
SELECT id, authority_name,
|
|
-- Expected ONRC-form normalized name (try several patterns; pick best one):
|
|
firms.normalize_company_name(authority_name) AS direct_norm,
|
|
firms.normalize_company_name(
|
|
regexp_replace(authority_name, '^PRIMARIA\s+(COMUNEI\s+|ORASULUI\s+|MUNICIPIULUI\s+|JUDETULUI\s+)?', '', 'i')
|
|
) AS primaria_stripped_norm,
|
|
firms.normalize_company_name(
|
|
CASE WHEN authority_name ~* '^CONSILIUL JUDETEAN '
|
|
THEN 'JUDETUL ' || regexp_replace(authority_name, '^CONSILIUL JUDETEAN\s+', '', 'i')
|
|
ELSE NULL END
|
|
) AS cj_norm
|
|
FROM cnsc.decizii
|
|
WHERE (authority_cuis IS NULL OR array_length(authority_cuis, 1) IS NULL)
|
|
AND authority_name IS NOT NULL;
|
|
CREATE INDEX ON tmp_cnsc_unmatched (direct_norm);
|
|
|
|
-- Pass 1: direct strip-parens match (firm name matches CNSC authority_name)
|
|
WITH candidates AS (
|
|
SELECT DISTINCT ON (u.id) u.id, e.cui
|
|
FROM tmp_cnsc_unmatched u
|
|
JOIN firms.entities e
|
|
ON firms.normalize_company_name(regexp_replace(e.name, '\s*\(.*$', '')) = u.direct_norm
|
|
ORDER BY u.id, e.cui
|
|
)
|
|
UPDATE cnsc.decizii d
|
|
SET authority_cuis = ARRAY[c.cui]::text[]
|
|
FROM candidates c
|
|
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
|
|
|
|
-- Pass 2: PRIMARIA-stripped match (PRIMARIA COMUNEI X → match COMUNA X / ORAS X etc.)
|
|
WITH candidates AS (
|
|
SELECT DISTINCT ON (u.id) u.id, f.cui
|
|
FROM tmp_cnsc_unmatched u
|
|
JOIN tmp_firms_uat f ON f.norm_stripped = u.primaria_stripped_norm
|
|
WHERE u.primaria_stripped_norm != u.direct_norm -- only when PRIMARIA-strip changed the name
|
|
ORDER BY u.id, f.cui
|
|
)
|
|
UPDATE cnsc.decizii d
|
|
SET authority_cuis = ARRAY[c.cui]::text[]
|
|
FROM candidates c
|
|
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
|
|
|
|
-- Pass 3: CONSILIUL JUDETEAN X → JUDETUL X
|
|
WITH candidates AS (
|
|
SELECT DISTINCT ON (u.id) u.id, f.cui
|
|
FROM tmp_cnsc_unmatched u
|
|
JOIN tmp_firms_uat f ON f.norm_stripped = u.cj_norm
|
|
WHERE u.cj_norm IS NOT NULL
|
|
ORDER BY u.id, f.cui
|
|
)
|
|
UPDATE cnsc.decizii d
|
|
SET authority_cuis = ARRAY[c.cui]::text[]
|
|
FROM candidates c
|
|
WHERE d.id = c.id AND (d.authority_cuis IS NULL OR array_length(d.authority_cuis, 1) IS NULL);
|
|
|
|
-- Refresh the per-authority MV
|
|
REFRESH MATERIALIZED VIEW CONCURRENTLY cnsc.mv_per_authority_cui;
|
|
|
|
-- Final stats
|
|
SELECT
|
|
count(*) AS total,
|
|
count(*) FILTER (WHERE array_length(authority_cuis, 1) > 0) AS with_auth_cui,
|
|
round(100.0 * count(*) FILTER (WHERE array_length(authority_cuis, 1) > 0) / count(*), 1) AS pct
|
|
FROM cnsc.decizii;
|
|
|
|
DROP TABLE tmp_firms_uat, tmp_cnsc_unmatched;
|