import postgres from 'postgres' import { drizzle } from 'drizzle-orm/postgres-js' import { sql } from 'drizzle-orm' import { readFileSync, existsSync } from 'fs' import path from 'path' import { fileURLToPath } from 'url' import { getIso } from '../lib/iso-codes' import { normalizeTeam } from '../lib/wiki-scraper' const DATABASE_URL = process.env.DATABASE_URL ?? 'postgres://wc:wc@localhost:5432/worldcup' const __dirname = path.dirname(fileURLToPath(import.meta.url)) const WC_DIR = path.join(__dirname, '../data') const YEARS = [ 1930,1934,1938,1950,1954,1958,1962,1966,1970,1974, 1978,1982,1986,1990,1994,1998,2002,2006,2010,2014,2018,2022, ] function readJson(filePath: string): T | null { if (!existsSync(filePath)) return null try { return JSON.parse(readFileSync(filePath, 'utf-8')) as T } catch { return null } } // ── Types matching scrape-wikipedia.ts output ────────────────────────────── type RawGoal = { name: string; minute?: string | number; offset?: number; penalty?: boolean; owngoal?: boolean } type RawScore = { ft?: number[]; ht?: number[]; et?: number[]; p?: number[] } type RawMatch = { round?: string; date?: string; time?: string; team1: string; team2: string; score?: RawScore; goals1?: RawGoal[]; goals2?: RawGoal[]; group?: string; ground?: string; } type RawData = { matches: RawMatch[] } type RawMeta = { host: string; teams_count: number | null; winner: string | null; runner_up: string | null; third_place: string | null; fourth_place: string | null } type RawStadiums = { stadiums: { name: string; city: string; cc?: string; capacity?: number; timezone?: string; coords?: string }[] } type RawSquad = { name: string; players: { name: string; number?: number; pos?: string; date_of_birth?: string }[] } function parseScore(score: RawScore | undefined) { if (!score) return {} if (Array.isArray(score)) return { ft: score as number[] } return { ft: score.ft, ht: score.ht, et: score.et, p: score.p } } async function run() { const client = postgres(DATABASE_URL, { max: 5 }) const db = drizzle(client) // Create tables await db.execute(sql` CREATE TABLE IF NOT EXISTS tournaments ( year INTEGER PRIMARY KEY, host TEXT NOT NULL, winner TEXT, runner_up TEXT, third_place TEXT, fourth_place TEXT, teams_count INTEGER, matches_count INTEGER, total_goals INTEGER, avg_goals_per_game NUMERIC(4,2) ); CREATE TABLE IF NOT EXISTS teams ( id SERIAL PRIMARY KEY, name TEXT UNIQUE NOT NULL, iso2 TEXT, fifa_code TEXT, continent TEXT, confederation TEXT ); CREATE TABLE IF NOT EXISTS stadiums ( id SERIAL PRIMARY KEY, tournament_year INTEGER, name TEXT NOT NULL, city TEXT, country_code TEXT, capacity INTEGER, timezone TEXT, coordinates TEXT ); CREATE TABLE IF NOT EXISTS matches ( id SERIAL PRIMARY KEY, tournament_year INTEGER NOT NULL, round TEXT NOT NULL, group_name TEXT, date DATE, time_local TEXT, stadium_id INTEGER, team1_id INTEGER NOT NULL, team2_id INTEGER NOT NULL, score_ft_home INTEGER, score_ft_away INTEGER, score_ht_home INTEGER, score_ht_away INTEGER, score_et_home INTEGER, score_et_away INTEGER, score_p_home INTEGER, score_p_away INTEGER, is_quali_playoff BOOLEAN DEFAULT false ); CREATE UNIQUE INDEX IF NOT EXISTS matches_unique ON matches (tournament_year, team1_id, team2_id, date, is_quali_playoff); CREATE TABLE IF NOT EXISTS goals ( id SERIAL PRIMARY KEY, match_id INTEGER NOT NULL, team_id INTEGER NOT NULL, player_name TEXT NOT NULL, minute INTEGER, minute_offset INTEGER DEFAULT 0, is_penalty BOOLEAN DEFAULT false, is_own_goal BOOLEAN DEFAULT false ); CREATE TABLE IF NOT EXISTS group_standings ( tournament_year INTEGER NOT NULL, group_name TEXT NOT NULL, team_id INTEGER NOT NULL, pos INTEGER, played INTEGER DEFAULT 0, won INTEGER DEFAULT 0, drawn INTEGER DEFAULT 0, lost INTEGER DEFAULT 0, goals_for INTEGER DEFAULT 0, goals_against INTEGER DEFAULT 0, goal_diff INTEGER DEFAULT 0, pts INTEGER DEFAULT 0, PRIMARY KEY (tournament_year, group_name, team_id) ); CREATE TABLE IF NOT EXISTS squads ( id SERIAL PRIMARY KEY, tournament_year INTEGER NOT NULL, team_id INTEGER NOT NULL, player_name TEXT NOT NULL, shirt_number INTEGER, position TEXT, date_of_birth DATE ); CREATE UNIQUE INDEX IF NOT EXISTS squads_unique ON squads (tournament_year, team_id, shirt_number); `) const force = process.argv.includes('--force') || process.argv.includes('-f') if (!force) { const existing = await db.execute(sql`SELECT COUNT(*)::int AS cnt FROM tournaments WHERE year < 2026`) if ((existing[0] as { cnt: number }).cnt > 0) { console.log('✓ Already seeded (historical data present), skipping. Use --force to re-import.') await client.end() return } } if (force) { console.log('--force: clearing historical data...') await db.execute(sql`DELETE FROM goals WHERE match_id IN (SELECT id FROM matches WHERE tournament_year < 2026)`) await db.execute(sql`DELETE FROM squads WHERE tournament_year < 2026`) await db.execute(sql`DELETE FROM group_standings WHERE tournament_year < 2026`) await db.execute(sql`DELETE FROM stadiums WHERE tournament_year < 2026`) await db.execute(sql`DELETE FROM matches WHERE tournament_year < 2026`) await db.execute(sql`DELETE FROM tournaments WHERE year < 2026`) } console.log('Seeding historical data (1930–2022)...') const teamCache = new Map() async function upsertTeam(rawName: string): Promise { const name = normalizeTeam(rawName) if (teamCache.has(name)) return teamCache.get(name)! const iso2 = getIso(name) const [row] = await db.execute(sql` INSERT INTO teams (name, iso2) VALUES (${name}, ${iso2 ?? null}) ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name RETURNING id `) const id = (row as { id: number }).id teamCache.set(name, id) return id } // Per-year data from Wikipedia JSON files let totalMatches = 0 let totalGoals = 0 for (const year of YEARS) { const yearDir = path.join(WC_DIR, String(year)) const mainData = readJson(path.join(yearDir, 'worldcup.json')) if (!mainData?.matches) { console.log(` ${year}: no data file, skipping`) continue } // Tournament row from meta.json const meta = readJson(path.join(yearDir, 'worldcup.meta.json')) if (meta) { await db.execute(sql` INSERT INTO tournaments (year, host, winner, runner_up, third_place, fourth_place, teams_count) VALUES ( ${year}, ${meta.host || null}, ${normalizeTeam(meta.winner ?? '') || null}, ${normalizeTeam(meta.runner_up ?? '') || null}, ${normalizeTeam(meta.third_place ?? '') || null}, ${normalizeTeam(meta.fourth_place ?? '') || null}, ${meta.teams_count ?? null} ) ON CONFLICT (year) DO UPDATE SET host = EXCLUDED.host, winner = EXCLUDED.winner, runner_up = EXCLUDED.runner_up, third_place = EXCLUDED.third_place, fourth_place = EXCLUDED.fourth_place, teams_count = EXCLUDED.teams_count `) } let matchCount = 0, goalCount = 0 // Stadiums const stadiumsData = readJson(path.join(yearDir, 'worldcup.stadiums.json')) if (stadiumsData?.stadiums) { for (const s of stadiumsData.stadiums) { await db.execute(sql` INSERT INTO stadiums (tournament_year, name, city) VALUES (${year}, ${s.name}, ${s.city ?? null}) ON CONFLICT DO NOTHING `) } } // Matches and goals for (const m of mainData.matches) { const t1Id = await upsertTeam(m.team1) const t2Id = await upsertTeam(m.team2) const score = parseScore(m.score) const [matchRow] = await db.execute(sql` INSERT INTO matches ( tournament_year, round, group_name, date, time_local, team1_id, team2_id, score_ft_home, score_ft_away, score_ht_home, score_ht_away, score_et_home, score_et_away, score_p_home, score_p_away, is_quali_playoff ) VALUES ( ${year}, ${m.round ?? 'Unknown'}, ${m.group ?? null}, ${m.date ?? null}, ${m.time ?? null}, ${t1Id}, ${t2Id}, ${score.ft?.[0] ?? null}, ${score.ft?.[1] ?? null}, ${score.ht?.[0] ?? null}, ${score.ht?.[1] ?? null}, ${score.et?.[0] ?? null}, ${score.et?.[1] ?? null}, ${score.p?.[0] ?? null}, ${score.p?.[1] ?? null}, false ) ON CONFLICT (tournament_year, team1_id, team2_id, date, is_quali_playoff) DO UPDATE SET round = EXCLUDED.round, group_name = COALESCE(EXCLUDED.group_name, matches.group_name), time_local = COALESCE(EXCLUDED.time_local, matches.time_local), score_ft_home = COALESCE(EXCLUDED.score_ft_home, matches.score_ft_home), score_ft_away = COALESCE(EXCLUDED.score_ft_away, matches.score_ft_away), score_ht_home = COALESCE(EXCLUDED.score_ht_home, matches.score_ht_home), score_ht_away = COALESCE(EXCLUDED.score_ht_away, matches.score_ht_away), score_et_home = COALESCE(EXCLUDED.score_et_home, matches.score_et_home), score_et_away = COALESCE(EXCLUDED.score_et_away, matches.score_et_away), score_p_home = COALESCE(EXCLUDED.score_p_home, matches.score_p_home), score_p_away = COALESCE(EXCLUDED.score_p_away, matches.score_p_away) RETURNING id `) const matchId = (matchRow as { id: number }).id // Goals (delete + re-insert) await db.execute(sql`DELETE FROM goals WHERE match_id = ${matchId}`) for (const [rawGoals, teamId, ogTeamId] of [ [m.goals1 ?? [], t1Id, t2Id], [m.goals2 ?? [], t2Id, t1Id], ] as [RawGoal[], number, number][]) { for (const g of rawGoals) { if (!g.name) continue const minute = g.minute != null ? parseInt(String(g.minute)) : null const actualTeamId = g.owngoal ? ogTeamId : teamId await db.execute(sql` INSERT INTO goals (match_id, team_id, player_name, minute, minute_offset, is_penalty, is_own_goal) VALUES (${matchId}, ${actualTeamId}, ${g.name}, ${!minute || isNaN(minute) ? null : minute}, ${g.offset ?? 0}, ${g.penalty ?? false}, ${g.owngoal ?? false}) `) goalCount++ } } matchCount++ } // Squads const squadsData = readJson(path.join(yearDir, 'worldcup.squads.json')) if (squadsData && Array.isArray(squadsData)) { for (const sq of squadsData) { const teamId = await upsertTeam(sq.name) for (const p of sq.players) { if (!p.name) continue const dob = p.date_of_birth ? p.date_of_birth.replace(/\s/g, '') : null await db.execute(sql` INSERT INTO squads (tournament_year, team_id, player_name, shirt_number, position, date_of_birth) VALUES (${year}, ${teamId}, ${p.name}, ${p.number ?? null}, ${p.pos ?? null}, ${dob}) ON CONFLICT (tournament_year, team_id, shirt_number) DO UPDATE SET player_name = EXCLUDED.player_name, position = EXCLUDED.position, date_of_birth = EXCLUDED.date_of_birth `) } } } console.log(` ${year}: ${matchCount} matches, ${goalCount} goals`) totalMatches += matchCount totalGoals += goalCount } // 3. Group standings (computed from match results) console.log('Computing group standings...') await db.execute(sql` DELETE FROM group_standings WHERE tournament_year < 2026 `) await db.execute(sql` INSERT INTO group_standings (tournament_year, group_name, team_id, played, won, drawn, lost, goals_for, goals_against, goal_diff, pts) WITH match_results AS ( SELECT tournament_year, group_name, team1_id AS team_id, score_ft_home AS gf, score_ft_away AS ga FROM matches WHERE tournament_year < 2026 AND group_name IS NOT NULL AND is_quali_playoff = false AND score_ft_home IS NOT NULL UNION ALL SELECT tournament_year, group_name, team2_id, score_ft_away, score_ft_home FROM matches WHERE tournament_year < 2026 AND group_name IS NOT NULL AND is_quali_playoff = false AND score_ft_home IS NOT NULL ) SELECT tournament_year, group_name, team_id, COUNT(*)::int, SUM(CASE WHEN gf > ga THEN 1 ELSE 0 END)::int, SUM(CASE WHEN gf = ga THEN 1 ELSE 0 END)::int, SUM(CASE WHEN gf < ga THEN 1 ELSE 0 END)::int, SUM(gf)::int, SUM(ga)::int, SUM(gf - ga)::int, SUM(CASE WHEN gf > ga THEN 3 WHEN gf = ga THEN 1 ELSE 0 END)::int FROM match_results GROUP BY tournament_year, group_name, team_id ON CONFLICT (tournament_year, group_name, team_id) DO UPDATE SET played = EXCLUDED.played, won = EXCLUDED.won, drawn = EXCLUDED.drawn, lost = EXCLUDED.lost, goals_for = EXCLUDED.goals_for, goals_against = EXCLUDED.goals_against, goal_diff = EXCLUDED.goal_diff, pts = EXCLUDED.pts `) // 4. Tournament aggregates await db.execute(sql` UPDATE tournaments t SET matches_count = ( SELECT COUNT(*)::int FROM matches WHERE tournament_year = t.year AND is_quali_playoff = false ), total_goals = ( SELECT COUNT(g.id)::int FROM goals g JOIN matches m ON g.match_id = m.id WHERE m.tournament_year = t.year AND m.is_quali_playoff = false ), avg_goals_per_game = ( SELECT ROUND(COUNT(g.id)::numeric / NULLIF(COUNT(DISTINCT m.id), 0), 2) FROM goals g JOIN matches m ON g.match_id = m.id WHERE m.tournament_year = t.year AND m.is_quali_playoff = false AND m.score_ft_home IS NOT NULL ) WHERE t.year < 2026 `) console.log(`\n✅ Seed complete: ${totalMatches} matches, ${totalGoals} goals (1930–2022)`) await client.end() } run().catch(e => { console.error('Seed failed:', e); process.exit(1) })