fix: normalize Bosnia & Herzegovina and USA team name variants
Add TEAM_ALIASES to lib/wiki-scraper.ts applied at extraction time so both scraper and sync consistently produce canonical names. Removes the duplicate alias map from seed.ts in favour of the shared normalizeTeam() export. Aliases added: Bosnia & Herzegovina → Bosnia and Herzegovina USA → United States Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+15
-1
@@ -75,6 +75,20 @@ export async function fetchWikiHtml(page: string, retries = 5): Promise<string |
|
|||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Team name normalisation ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const TEAM_ALIASES: Record<string, string> = {
|
||||||
|
'West Germany': 'Germany',
|
||||||
|
'Korea Republic': 'South Korea',
|
||||||
|
'IR Iran': 'Iran',
|
||||||
|
'Bosnia & Herzegovina': 'Bosnia and Herzegovina',
|
||||||
|
'USA': 'United States',
|
||||||
|
}
|
||||||
|
|
||||||
|
export function normalizeTeam(name: string): string {
|
||||||
|
return TEAM_ALIASES[name] ?? name
|
||||||
|
}
|
||||||
|
|
||||||
// ── Parsing helpers ────────────────────────────────────────────────────────
|
// ── Parsing helpers ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
function parseScoreText(text: string): [number, number] | null {
|
function parseScoreText(text: string): [number, number] | null {
|
||||||
@@ -92,7 +106,7 @@ function extractTeam($: CheerioAPI, $cell: Cheerio<Element>): string {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
return name
|
return normalizeTeam(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseGoals($: CheerioAPI, $td: Cheerio<Element>): Goal[] {
|
function parseGoals($: CheerioAPI, $td: Cheerio<Element>): Goal[] {
|
||||||
|
|||||||
+4
-13
@@ -5,6 +5,7 @@ import { readFileSync, existsSync } from 'fs'
|
|||||||
import path from 'path'
|
import path from 'path'
|
||||||
import { fileURLToPath } from 'url'
|
import { fileURLToPath } from 'url'
|
||||||
import { getIso } from '../lib/iso-codes'
|
import { getIso } from '../lib/iso-codes'
|
||||||
|
import { normalizeTeam } from '../lib/wiki-scraper'
|
||||||
|
|
||||||
const DATABASE_URL = process.env.DATABASE_URL ?? 'postgres://wc:wc@localhost:5432/worldcup'
|
const DATABASE_URL = process.env.DATABASE_URL ?? 'postgres://wc:wc@localhost:5432/worldcup'
|
||||||
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
||||||
@@ -15,16 +16,6 @@ const YEARS = [
|
|||||||
1978,1982,1986,1990,1994,1998,2002,2006,2010,2014,2018,2022,
|
1978,1982,1986,1990,1994,1998,2002,2006,2010,2014,2018,2022,
|
||||||
]
|
]
|
||||||
|
|
||||||
// Normalize team names from Wikipedia to canonical DB names
|
|
||||||
const TEAM_ALIASES: Record<string, string> = {
|
|
||||||
'West Germany': 'Germany',
|
|
||||||
'Korea Republic': 'South Korea',
|
|
||||||
'IR Iran': 'Iran',
|
|
||||||
}
|
|
||||||
|
|
||||||
function normTeam(name: string): string {
|
|
||||||
return TEAM_ALIASES[name] ?? name
|
|
||||||
}
|
|
||||||
|
|
||||||
function readJson<T>(filePath: string): T | null {
|
function readJson<T>(filePath: string): T | null {
|
||||||
if (!existsSync(filePath)) return null
|
if (!existsSync(filePath)) return null
|
||||||
@@ -174,7 +165,7 @@ async function run() {
|
|||||||
const teamCache = new Map<string, number>()
|
const teamCache = new Map<string, number>()
|
||||||
|
|
||||||
async function upsertTeam(rawName: string): Promise<number> {
|
async function upsertTeam(rawName: string): Promise<number> {
|
||||||
const name = normTeam(rawName)
|
const name = normalizeTeam(rawName)
|
||||||
if (teamCache.has(name)) return teamCache.get(name)!
|
if (teamCache.has(name)) return teamCache.get(name)!
|
||||||
const iso2 = getIso(name)
|
const iso2 = getIso(name)
|
||||||
const [row] = await db.execute(sql`
|
const [row] = await db.execute(sql`
|
||||||
@@ -207,8 +198,8 @@ async function run() {
|
|||||||
INSERT INTO tournaments (year, host, winner, runner_up, third_place, fourth_place, teams_count)
|
INSERT INTO tournaments (year, host, winner, runner_up, third_place, fourth_place, teams_count)
|
||||||
VALUES (
|
VALUES (
|
||||||
${year}, ${meta.host || null},
|
${year}, ${meta.host || null},
|
||||||
${normTeam(meta.winner ?? '') || null}, ${normTeam(meta.runner_up ?? '') || null},
|
${normalizeTeam(meta.winner ?? '') || null}, ${normalizeTeam(meta.runner_up ?? '') || null},
|
||||||
${normTeam(meta.third_place ?? '') || null}, ${normTeam(meta.fourth_place ?? '') || null},
|
${normalizeTeam(meta.third_place ?? '') || null}, ${normalizeTeam(meta.fourth_place ?? '') || null},
|
||||||
${meta.teams_count ?? null}
|
${meta.teams_count ?? null}
|
||||||
)
|
)
|
||||||
ON CONFLICT (year) DO UPDATE SET
|
ON CONFLICT (year) DO UPDATE SET
|
||||||
|
|||||||
Reference in New Issue
Block a user