import postgres from 'postgres' import { drizzle } from 'drizzle-orm/postgres-js' import { sql } from 'drizzle-orm' import { fetchWikiHtml, scrapeYear, scrapeSquads } from '../lib/wiki-scraper' import { getIso } from '../lib/iso-codes' const DATABASE_URL = process.env.DATABASE_URL if (!DATABASE_URL) { console.error('ERROR: DATABASE_URL environment variable is not set') process.exit(1) } // ── DB helpers ───────────────────────────────────────────────────────────── async function run() { const client = postgres(DATABASE_URL!, { max: 2 }) const db = drizzle(client) const teamCache = new Map() async function upsertTeam(rawName: string) { if (teamCache.has(rawName)) return teamCache.get(rawName)! const iso2 = getIso(rawName) const [row] = await db.execute(sql` INSERT INTO teams (name, iso2) VALUES (${rawName}, ${iso2 ?? null}) ON CONFLICT (name) DO UPDATE SET iso2 = COALESCE(EXCLUDED.iso2, teams.iso2) RETURNING id `) const id = (row as { id: number }).id teamCache.set(rawName, id) return id } async function upsertMatch( year: number, round: string, group: string | null, dateStr: string | null, timeStr: string | null, team1Id: number, team2Id: number, ft: [number, number] | undefined, et: [number, number] | undefined, p: [number, number] | undefined, isQuali: boolean, ) { const rows = await db.execute(sql` INSERT INTO matches (tournament_year, round, group_name, date, time_local, team1_id, team2_id, score_ft_home, score_ft_away, score_et_home, score_et_away, score_p_home, score_p_away, is_quali_playoff) VALUES ( ${year}, ${round}, ${group}, ${dateStr}, ${timeStr}, ${team1Id}, ${team2Id}, ${ft?.[0] ?? null}, ${ft?.[1] ?? null}, ${et?.[0] ?? null}, ${et?.[1] ?? null}, ${p?.[0] ?? null}, ${p?.[1] ?? null}, ${isQuali} ) ON CONFLICT (tournament_year, team1_id, team2_id, date, is_quali_playoff) DO UPDATE SET round = EXCLUDED.round, time_local = COALESCE(EXCLUDED.time_local, matches.time_local), score_ft_home = COALESCE(EXCLUDED.score_ft_home, matches.score_ft_home), score_ft_away = COALESCE(EXCLUDED.score_ft_away, matches.score_ft_away), score_et_home = COALESCE(EXCLUDED.score_et_home, matches.score_et_home), score_et_away = COALESCE(EXCLUDED.score_et_away, matches.score_et_away), score_p_home = COALESCE(EXCLUDED.score_p_home, matches.score_p_home), score_p_away = COALESCE(EXCLUDED.score_p_away, matches.score_p_away) RETURNING id `) return (rows[0] as { id: number }).id } async function replaceGoals(matchId: number, goals: Array<{ teamId: number; name: string; minute: number | null; offset: number; penalty: boolean; owngoal: boolean }>) { await db.transaction(async tx => { await tx.execute(sql`DELETE FROM goals WHERE match_id = ${matchId}`) if (goals.length > 0) { const vals = goals.map(g => sql`(${matchId}, ${g.teamId}, ${g.name}, ${g.minute}, ${g.offset}, ${g.penalty}, ${g.owngoal})` ) await tx.execute(sql` INSERT INTO goals (match_id, team_id, player_name, minute, minute_offset, is_penalty, is_own_goal) VALUES ${sql.join(vals, sql`, `)} `) } }) } // ── Incremental group detection ──────────────────────────────────────────── // Groups where every known match already has a FT score — no need to re-fetch their sub-page. async function getCompletedGroups(): Promise> { const rows = await db.execute(sql` SELECT group_name FROM matches WHERE tournament_year = 2026 AND group_name IS NOT NULL AND is_quali_playoff = false GROUP BY group_name HAVING COUNT(*) > 0 AND COUNT(*) = SUM(CASE WHEN score_ft_home IS NOT NULL THEN 1 ELSE 0 END) `) return new Set(rows.map(r => (r as { group_name: string }).group_name)) } // ── Sync 2026 from Wikipedia ─────────────────────────────────────────────── const force = process.argv.includes('--force') if (force) { console.log('--force: clearing 2026 data...') await db.execute(sql`DELETE FROM goals WHERE match_id IN (SELECT id FROM matches WHERE tournament_year = 2026)`) await db.execute(sql`DELETE FROM squads WHERE tournament_year = 2026`) await db.execute(sql`DELETE FROM group_standings WHERE tournament_year = 2026`) await db.execute(sql`DELETE FROM stadiums WHERE tournament_year = 2026`) await db.execute(sql`DELETE FROM matches WHERE tournament_year = 2026`) // Remove orphaned teams that only appeared in 2026 data await db.execute(sql`DELETE FROM teams WHERE id NOT IN (SELECT team1_id FROM matches UNION SELECT team2_id FROM matches)`) } console.log('\nSyncing 2026 from Wikipedia...') await db.execute(sql` INSERT INTO tournaments (year, host) VALUES (2026, 'USA / Canada / Mexico') ON CONFLICT (year) DO NOTHING `) const mainHtml = await fetchWikiHtml('2026_FIFA_World_Cup') if (!mainHtml) { console.error(' FAILED to fetch 2026 Wikipedia page') await client.end() process.exit(1) } const completedGroups = await getCompletedGroups() if (completedGroups.size > 0) console.log(` Skipping completed groups: ${[...completedGroups].sort().join(', ')}`) process.stdout.write(' ') const { matches, stadiums, meta } = await scrapeYear(2026, mainHtml, { skipGroups: completedGroups }) console.log() // Stadiums for (const s of stadiums.values()) { await db.execute(sql` INSERT INTO stadiums (tournament_year, name, city) VALUES (2026, ${s.name}, ${s.city ?? null}) ON CONFLICT DO NOTHING `) } // Matches + goals let matchCount = 0, goalCount = 0 for (const m of matches) { const t1Id = await upsertTeam(m.team1) const t2Id = await upsertTeam(m.team2) const matchId = await upsertMatch( 2026, m.round, m.group ?? null, m.date ?? null, m.time ?? null, t1Id, t2Id, m.score?.ft, m.score?.et, m.score?.p, false, ) const goals = [ ...(m.goals1 ?? []).map(g => ({ teamId: g.owngoal ? t2Id : t1Id, name: g.name, minute: g.minute ?? null, offset: g.offset ?? 0, penalty: g.penalty ?? false, owngoal: g.owngoal ?? false, })), ...(m.goals2 ?? []).map(g => ({ teamId: g.owngoal ? t1Id : t2Id, name: g.name, minute: g.minute ?? null, offset: g.offset ?? 0, penalty: g.penalty ?? false, owngoal: g.owngoal ?? false, })), ] if (goals.length > 0) await replaceGoals(matchId, goals) matchCount++ goalCount += goals.length } // Squads (fetch once; idempotent upsert so safe to re-run) const squadHtml = await fetchWikiHtml('2026_FIFA_World_Cup_squads') if (squadHtml) { const squads = scrapeSquads(squadHtml) for (const sq of squads) { const teamId = await upsertTeam(sq.name) for (const p of sq.players) { const dob = p.date_of_birth ? p.date_of_birth.replace(/\s/g, '') : null await db.execute(sql` INSERT INTO squads (tournament_year, team_id, player_name, shirt_number, position, date_of_birth) VALUES (2026, ${teamId}, ${p.name}, ${p.number ?? null}, ${p.pos ?? null}, ${dob}) ON CONFLICT (tournament_year, team_id, shirt_number) DO UPDATE SET player_name = EXCLUDED.player_name, position = EXCLUDED.position, date_of_birth = EXCLUDED.date_of_birth `) } } console.log(` Squads: ${squads.length} teams`) } // Tournament winner (once the final is played) if (meta.winner) { await db.execute(sql` UPDATE tournaments SET winner = ${meta.winner}, runner_up = ${meta.runner_up}, third_place = ${meta.third_place}, fourth_place = ${meta.fourth_place} WHERE year = 2026 `) } // Group standings await db.execute(sql` WITH match_results AS ( SELECT tournament_year, group_name, team1_id AS team_id, score_ft_home AS gf, score_ft_away AS ga FROM matches WHERE tournament_year = 2026 AND group_name IS NOT NULL AND is_quali_playoff = false AND score_ft_home IS NOT NULL UNION ALL SELECT tournament_year, group_name, team2_id, score_ft_away, score_ft_home FROM matches WHERE tournament_year = 2026 AND group_name IS NOT NULL AND is_quali_playoff = false AND score_ft_home IS NOT NULL ) INSERT INTO group_standings (tournament_year, group_name, team_id, played, won, drawn, lost, goals_for, goals_against, goal_diff, pts) SELECT tournament_year, group_name, team_id, COUNT(*)::int, SUM(CASE WHEN gf > ga THEN 1 ELSE 0 END)::int, SUM(CASE WHEN gf = ga THEN 1 ELSE 0 END)::int, SUM(CASE WHEN gf < ga THEN 1 ELSE 0 END)::int, SUM(gf)::int, SUM(ga)::int, SUM(gf - ga)::int, SUM(CASE WHEN gf > ga THEN 3 WHEN gf = ga THEN 1 ELSE 0 END)::int FROM match_results GROUP BY tournament_year, group_name, team_id ON CONFLICT (tournament_year, group_name, team_id) DO UPDATE SET played = EXCLUDED.played, won = EXCLUDED.won, drawn = EXCLUDED.drawn, lost = EXCLUDED.lost, goals_for = EXCLUDED.goals_for, goals_against = EXCLUDED.goals_against, goal_diff = EXCLUDED.goal_diff, pts = EXCLUDED.pts `) // Ensure every team that appears in a group match has a standings row (0-0-0-0 for unplayed teams) await db.execute(sql` INSERT INTO group_standings (tournament_year, group_name, team_id, played, won, drawn, lost, goals_for, goals_against, goal_diff, pts) SELECT DISTINCT 2026, group_name, team1_id, 0, 0, 0, 0, 0, 0, 0, 0 FROM matches WHERE tournament_year = 2026 AND group_name IS NOT NULL AND is_quali_playoff = false UNION SELECT DISTINCT 2026, group_name, team2_id, 0, 0, 0, 0, 0, 0, 0, 0 FROM matches WHERE tournament_year = 2026 AND group_name IS NOT NULL AND is_quali_playoff = false ON CONFLICT (tournament_year, group_name, team_id) DO NOTHING `) // Tournament aggregates await db.execute(sql` UPDATE tournaments SET matches_count = (SELECT COUNT(*)::int FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false), total_goals = (SELECT COALESCE(SUM(score_ft_home + score_ft_away), 0)::int FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false AND score_ft_home IS NOT NULL), avg_goals_per_game = ( SELECT ROUND(COALESCE(SUM(score_ft_home + score_ft_away), 0)::numeric / NULLIF(COUNT(*), 0), 2) FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false AND score_ft_home IS NOT NULL ) WHERE year = 2026 `) console.log(` ✓ ${matchCount} matches, ${goalCount} goals`) console.log('\n✅ Sync complete!') await client.end() } run().catch(e => { console.error(e); process.exit(1) })