diff --git a/lib/wiki-scraper.ts b/lib/wiki-scraper.ts index 78c596c..2636eb9 100644 --- a/lib/wiki-scraper.ts +++ b/lib/wiki-scraper.ts @@ -75,6 +75,20 @@ export async function fetchWikiHtml(page: string, retries = 5): Promise = { + 'West Germany': 'Germany', + 'Korea Republic': 'South Korea', + 'IR Iran': 'Iran', + 'Bosnia & Herzegovina': 'Bosnia and Herzegovina', + 'USA': 'United States', +} + +export function normalizeTeam(name: string): string { + return TEAM_ALIASES[name] ?? name +} + // ── Parsing helpers ──────────────────────────────────────────────────────── function parseScoreText(text: string): [number, number] | null { @@ -92,7 +106,7 @@ function extractTeam($: CheerioAPI, $cell: Cheerio): string { return false } }) - return name + return normalizeTeam(name) } function parseGoals($: CheerioAPI, $td: Cheerio): Goal[] { diff --git a/scripts/seed.ts b/scripts/seed.ts index 414f359..647e2e1 100644 --- a/scripts/seed.ts +++ b/scripts/seed.ts @@ -5,6 +5,7 @@ import { readFileSync, existsSync } from 'fs' import path from 'path' import { fileURLToPath } from 'url' import { getIso } from '../lib/iso-codes' +import { normalizeTeam } from '../lib/wiki-scraper' const DATABASE_URL = process.env.DATABASE_URL ?? 'postgres://wc:wc@localhost:5432/worldcup' const __dirname = path.dirname(fileURLToPath(import.meta.url)) @@ -15,16 +16,6 @@ const YEARS = [ 1978,1982,1986,1990,1994,1998,2002,2006,2010,2014,2018,2022, ] -// Normalize team names from Wikipedia to canonical DB names -const TEAM_ALIASES: Record = { - 'West Germany': 'Germany', - 'Korea Republic': 'South Korea', - 'IR Iran': 'Iran', -} - -function normTeam(name: string): string { - return TEAM_ALIASES[name] ?? name -} function readJson(filePath: string): T | null { if (!existsSync(filePath)) return null @@ -174,7 +165,7 @@ async function run() { const teamCache = new Map() async function upsertTeam(rawName: string): Promise { - const name = normTeam(rawName) + const name = normalizeTeam(rawName) if (teamCache.has(name)) return teamCache.get(name)! const iso2 = getIso(name) const [row] = await db.execute(sql` @@ -207,8 +198,8 @@ async function run() { INSERT INTO tournaments (year, host, winner, runner_up, third_place, fourth_place, teams_count) VALUES ( ${year}, ${meta.host || null}, - ${normTeam(meta.winner ?? '') || null}, ${normTeam(meta.runner_up ?? '') || null}, - ${normTeam(meta.third_place ?? '') || null}, ${normTeam(meta.fourth_place ?? '') || null}, + ${normalizeTeam(meta.winner ?? '') || null}, ${normalizeTeam(meta.runner_up ?? '') || null}, + ${normalizeTeam(meta.third_place ?? '') || null}, ${normalizeTeam(meta.fourth_place ?? '') || null}, ${meta.teams_count ?? null} ) ON CONFLICT (year) DO UPDATE SET