Files
worldcup/scripts/sync.ts
T

236 lines
9.6 KiB
TypeScript
Raw Normal View History

import postgres from 'postgres'
import { drizzle } from 'drizzle-orm/postgres-js'
import { sql } from 'drizzle-orm'
import { fetchWikiHtml, scrapeYear, scrapeSquads } from '../lib/wiki-scraper'
import { getIso } from '../lib/iso-codes'
const DATABASE_URL = process.env.DATABASE_URL
if (!DATABASE_URL) {
console.error('ERROR: DATABASE_URL environment variable is not set')
process.exit(1)
}
// ── DB helpers ─────────────────────────────────────────────────────────────
async function run() {
const client = postgres(DATABASE_URL!, { max: 2 })
const db = drizzle(client)
const teamCache = new Map<string, number>()
async function upsertTeam(rawName: string) {
if (teamCache.has(rawName)) return teamCache.get(rawName)!
const iso2 = getIso(rawName)
const [row] = await db.execute(sql`
INSERT INTO teams (name, iso2)
VALUES (${rawName}, ${iso2 ?? null})
ON CONFLICT (name) DO UPDATE SET iso2 = COALESCE(EXCLUDED.iso2, teams.iso2)
RETURNING id
`)
const id = (row as { id: number }).id
teamCache.set(rawName, id)
return id
}
async function upsertMatch(
year: number, round: string, group: string | null, dateStr: string | null,
timeStr: string | null, team1Id: number, team2Id: number,
ft: [number, number] | undefined, et: [number, number] | undefined, p: [number, number] | undefined,
isQuali: boolean,
) {
const rows = await db.execute(sql`
INSERT INTO matches (tournament_year, round, group_name, date, time_local, team1_id, team2_id,
score_ft_home, score_ft_away, score_et_home, score_et_away,
score_p_home, score_p_away, is_quali_playoff)
VALUES (
${year}, ${round}, ${group}, ${dateStr}, ${timeStr}, ${team1Id}, ${team2Id},
${ft?.[0] ?? null}, ${ft?.[1] ?? null},
${et?.[0] ?? null}, ${et?.[1] ?? null},
${p?.[0] ?? null}, ${p?.[1] ?? null},
${isQuali}
)
ON CONFLICT (tournament_year, team1_id, team2_id, date, is_quali_playoff) DO UPDATE SET
round = EXCLUDED.round,
time_local = COALESCE(EXCLUDED.time_local, matches.time_local),
score_ft_home = COALESCE(EXCLUDED.score_ft_home, matches.score_ft_home),
score_ft_away = COALESCE(EXCLUDED.score_ft_away, matches.score_ft_away),
score_et_home = COALESCE(EXCLUDED.score_et_home, matches.score_et_home),
score_et_away = COALESCE(EXCLUDED.score_et_away, matches.score_et_away),
score_p_home = COALESCE(EXCLUDED.score_p_home, matches.score_p_home),
score_p_away = COALESCE(EXCLUDED.score_p_away, matches.score_p_away)
RETURNING id
`)
return (rows[0] as { id: number }).id
}
async function replaceGoals(matchId: number, goals: Array<{
teamId: number; name: string; minute: number | null; offset: number; penalty: boolean; owngoal: boolean
}>) {
await db.transaction(async tx => {
await tx.execute(sql`DELETE FROM goals WHERE match_id = ${matchId}`)
if (goals.length > 0) {
const vals = goals.map(g =>
sql`(${matchId}, ${g.teamId}, ${g.name}, ${g.minute}, ${g.offset}, ${g.penalty}, ${g.owngoal})`
)
await tx.execute(sql`
INSERT INTO goals (match_id, team_id, player_name, minute, minute_offset, is_penalty, is_own_goal)
VALUES ${sql.join(vals, sql`, `)}
`)
}
})
}
// ── Incremental group detection ────────────────────────────────────────────
// Groups where every known match already has a FT score — no need to re-fetch their sub-page.
async function getCompletedGroups(): Promise<Set<string>> {
const rows = await db.execute(sql`
SELECT group_name
FROM matches
WHERE tournament_year = 2026
AND group_name IS NOT NULL
AND is_quali_playoff = false
GROUP BY group_name
HAVING COUNT(*) > 0
AND COUNT(*) = SUM(CASE WHEN score_ft_home IS NOT NULL THEN 1 ELSE 0 END)
`)
return new Set(rows.map(r => (r as { group_name: string }).group_name))
}
// ── Sync 2026 from Wikipedia ───────────────────────────────────────────────
console.log('\nSyncing 2026 from Wikipedia...')
await db.execute(sql`
INSERT INTO tournaments (year, host)
VALUES (2026, 'USA / Canada / Mexico')
ON CONFLICT (year) DO NOTHING
`)
const mainHtml = await fetchWikiHtml('2026_FIFA_World_Cup')
if (!mainHtml) {
console.error(' FAILED to fetch 2026 Wikipedia page')
await client.end()
process.exit(1)
}
const completedGroups = await getCompletedGroups()
if (completedGroups.size > 0)
console.log(` Skipping completed groups: ${[...completedGroups].sort().join(', ')}`)
process.stdout.write(' ')
const { matches, stadiums, meta } = await scrapeYear(2026, mainHtml, { skipGroups: completedGroups })
console.log()
// Stadiums
for (const s of stadiums.values()) {
await db.execute(sql`
INSERT INTO stadiums (tournament_year, name, city)
VALUES (2026, ${s.name}, ${s.city ?? null})
ON CONFLICT DO NOTHING
`)
}
// Matches + goals
let matchCount = 0, goalCount = 0
for (const m of matches) {
const t1Id = await upsertTeam(m.team1)
const t2Id = await upsertTeam(m.team2)
const matchId = await upsertMatch(
2026, m.round, m.group ?? null, m.date ?? null, m.time ?? null,
t1Id, t2Id, m.score?.ft, m.score?.et, m.score?.p, false,
)
const goals = [
...(m.goals1 ?? []).map(g => ({
teamId: g.owngoal ? t2Id : t1Id, name: g.name,
minute: g.minute ?? null, offset: g.offset ?? 0,
penalty: g.penalty ?? false, owngoal: g.owngoal ?? false,
})),
...(m.goals2 ?? []).map(g => ({
teamId: g.owngoal ? t1Id : t2Id, name: g.name,
minute: g.minute ?? null, offset: g.offset ?? 0,
penalty: g.penalty ?? false, owngoal: g.owngoal ?? false,
})),
]
if (goals.length > 0) await replaceGoals(matchId, goals)
matchCount++
goalCount += goals.length
}
// Squads (fetch once; idempotent upsert so safe to re-run)
const squadHtml = await fetchWikiHtml('2026_FIFA_World_Cup_squads')
if (squadHtml) {
const squads = scrapeSquads(squadHtml)
for (const sq of squads) {
const teamId = await upsertTeam(sq.name)
for (const p of sq.players) {
const dob = p.date_of_birth ? p.date_of_birth.replace(/\s/g, '') : null
await db.execute(sql`
INSERT INTO squads (tournament_year, team_id, player_name, shirt_number, position, date_of_birth)
VALUES (2026, ${teamId}, ${p.name}, ${p.number ?? null}, ${p.pos ?? null}, ${dob})
ON CONFLICT (tournament_year, team_id, shirt_number) DO UPDATE SET
player_name = EXCLUDED.player_name,
position = EXCLUDED.position,
date_of_birth = EXCLUDED.date_of_birth
`)
}
}
console.log(` Squads: ${squads.length} teams`)
}
// Tournament winner (once the final is played)
if (meta.winner) {
await db.execute(sql`
UPDATE tournaments SET
winner = ${meta.winner},
runner_up = ${meta.runner_up},
third_place = ${meta.third_place},
fourth_place = ${meta.fourth_place}
WHERE year = 2026
`)
}
// Group standings
await db.execute(sql`
WITH match_results AS (
SELECT tournament_year, group_name, team1_id AS team_id, score_ft_home AS gf, score_ft_away AS ga
FROM matches WHERE tournament_year = 2026 AND group_name IS NOT NULL AND is_quali_playoff = false AND score_ft_home IS NOT NULL
UNION ALL
SELECT tournament_year, group_name, team2_id, score_ft_away, score_ft_home
FROM matches WHERE tournament_year = 2026 AND group_name IS NOT NULL AND is_quali_playoff = false AND score_ft_home IS NOT NULL
)
INSERT INTO group_standings (tournament_year, group_name, team_id, played, won, drawn, lost, goals_for, goals_against, goal_diff, pts)
SELECT tournament_year, group_name, team_id,
COUNT(*)::int,
SUM(CASE WHEN gf > ga THEN 1 ELSE 0 END)::int,
SUM(CASE WHEN gf = ga THEN 1 ELSE 0 END)::int,
SUM(CASE WHEN gf < ga THEN 1 ELSE 0 END)::int,
SUM(gf)::int, SUM(ga)::int, SUM(gf - ga)::int,
SUM(CASE WHEN gf > ga THEN 3 WHEN gf = ga THEN 1 ELSE 0 END)::int
FROM match_results
GROUP BY tournament_year, group_name, team_id
ON CONFLICT (tournament_year, group_name, team_id) DO UPDATE SET
played = EXCLUDED.played, won = EXCLUDED.won, drawn = EXCLUDED.drawn,
lost = EXCLUDED.lost, goals_for = EXCLUDED.goals_for, goals_against = EXCLUDED.goals_against,
goal_diff = EXCLUDED.goal_diff, pts = EXCLUDED.pts
`)
// Tournament aggregates
await db.execute(sql`
UPDATE tournaments SET
matches_count = (SELECT COUNT(*)::int FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false),
total_goals = (SELECT COALESCE(SUM(score_ft_home + score_ft_away), 0)::int FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false AND score_ft_home IS NOT NULL),
avg_goals_per_game = (
SELECT ROUND(COALESCE(SUM(score_ft_home + score_ft_away), 0)::numeric / NULLIF(COUNT(*), 0), 2)
FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false AND score_ft_home IS NOT NULL
)
WHERE year = 2026
`)
console.log(`${matchCount} matches, ${goalCount} goals`)
console.log('\n✅ Sync complete!')
await client.end()
}
run().catch(e => { console.error(e); process.exit(1) })