feat: replace historical sync with Kaggle seed for complete 1930-2022 goal data
- scripts/seed.ts: one-time import of Kaggle FIFA dataset (matches_1930_2022.csv, world_cup.csv) covering all 964 matches and 2720 goals from 1930-2022 with full scorer names, minutes, penalties, and own goals for every tournament - scripts/sync.ts: stripped to 2026 only (openfootball live data); historical years removed since Kaggle is now authoritative for 1930-2022 - Dockerfile: copy app/data into runner image; CMD runs seed.ts before server.js so a fresh deployment auto-seeds on first start (skips if already seeded) - package.json: add 'seed' script; use --force to re-import from updated CSV files - app/data/kaggle/: bundle Kaggle CSV files in repo Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+382
@@ -0,0 +1,382 @@
|
||||
import postgres from 'postgres'
|
||||
import { drizzle } from 'drizzle-orm/postgres-js'
|
||||
import { sql } from 'drizzle-orm'
|
||||
import { readFileSync } from 'fs'
|
||||
import path from 'path'
|
||||
import { fileURLToPath } from 'url'
|
||||
import { getIso } from '../lib/iso-codes'
|
||||
|
||||
const DATABASE_URL = process.env.DATABASE_URL ?? 'postgres://wc:wc@localhost:5432/worldcup'
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
||||
const DATA_DIR = path.join(__dirname, '../app/data/kaggle')
|
||||
|
||||
// Third/fourth place not present in Kaggle world_cup.csv
|
||||
const PLACEMENTS: Record<number, { third?: string; fourth?: string }> = {
|
||||
1930: { third: 'USA', fourth: 'Yugoslavia' },
|
||||
1934: { third: 'Germany', fourth: 'Austria' },
|
||||
1938: { third: 'Brazil', fourth: 'Sweden' },
|
||||
1954: { third: 'Austria', fourth: 'Uruguay' },
|
||||
1958: { third: 'France', fourth: 'Germany' },
|
||||
1962: { third: 'Chile', fourth: 'Yugoslavia' },
|
||||
1966: { third: 'Portugal', fourth: 'Soviet Union' },
|
||||
1970: { third: 'Germany', fourth: 'Uruguay' },
|
||||
1974: { third: 'Poland', fourth: 'Brazil' },
|
||||
1978: { third: 'Brazil', fourth: 'Italy' },
|
||||
1982: { third: 'Poland', fourth: 'France' },
|
||||
1986: { third: 'France', fourth: 'Belgium' },
|
||||
1990: { third: 'Italy', fourth: 'England' },
|
||||
1994: { third: 'Sweden', fourth: 'Bulgaria' },
|
||||
1998: { third: 'Croatia', fourth: 'Netherlands' },
|
||||
2002: { third: 'Turkey', fourth: 'South Korea' },
|
||||
2006: { third: 'Germany', fourth: 'Portugal' },
|
||||
2010: { third: 'Germany', fourth: 'Uruguay' },
|
||||
2014: { third: 'Netherlands', fourth: 'Brazil' },
|
||||
2018: { third: 'Belgium', fourth: 'England' },
|
||||
2022: { third: 'Croatia', fourth: 'Morocco' },
|
||||
}
|
||||
|
||||
// Normalize Kaggle team names to match openfootball / our canonical names
|
||||
const TEAM_ALIASES: Record<string, string> = {
|
||||
'West Germany': 'Germany',
|
||||
'Korea Republic': 'South Korea',
|
||||
'IR Iran': 'Iran',
|
||||
}
|
||||
|
||||
function normTeam(name: string): string {
|
||||
return TEAM_ALIASES[name] ?? name
|
||||
}
|
||||
|
||||
// Minimal RFC-4180 CSV parser — no external dependency needed
|
||||
function parseCsv(content: string): Record<string, string>[] {
|
||||
const rows: string[][] = []
|
||||
let row: string[] = []
|
||||
let field = ''
|
||||
let inQ = false
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const ch = content[i]
|
||||
if (inQ) {
|
||||
if (ch === '"') {
|
||||
if (content[i + 1] === '"') { field += '"'; i++ }
|
||||
else inQ = false
|
||||
} else {
|
||||
field += ch
|
||||
}
|
||||
} else if (ch === '"') {
|
||||
inQ = true
|
||||
} else if (ch === ',') {
|
||||
row.push(field); field = ''
|
||||
} else if (ch === '\n') {
|
||||
row.push(field); rows.push(row); row = []; field = ''
|
||||
} else if (ch !== '\r') {
|
||||
field += ch
|
||||
}
|
||||
}
|
||||
if (field || row.length) { row.push(field); rows.push(row) }
|
||||
const headers = rows[0]
|
||||
return rows.slice(1)
|
||||
.filter(r => r.some(f => f.trim()))
|
||||
.map(r => Object.fromEntries(headers.map((h, i) => [h.trim(), (r[i] ?? '').trim()])))
|
||||
}
|
||||
|
||||
type GoalEntry = { name: string; minute: number | null; offset: number; isPenalty: boolean; isOwnGoal: boolean }
|
||||
|
||||
// Parse "Player Name · 57" or "Player (OG) · 90+3" → GoalEntry
|
||||
function parseGoalStr(entry: string, isPenalty = false, isOwnGoal = false): GoalEntry | null {
|
||||
const dot = entry.lastIndexOf('·')
|
||||
if (dot === -1) return null
|
||||
const name = entry.slice(0, dot).trim()
|
||||
.replace(/\s*\(P\)\s*$/, '').replace(/\s*\(OG\)\s*$/, '').trim()
|
||||
if (!name) return null
|
||||
const minRaw = entry.slice(dot + 1).trim()
|
||||
const plusIdx = minRaw.indexOf('+')
|
||||
let minute: number | null, offset = 0
|
||||
if (plusIdx !== -1) {
|
||||
minute = parseInt(minRaw.slice(0, plusIdx))
|
||||
offset = parseInt(minRaw.slice(plusIdx + 1)) || 0
|
||||
} else {
|
||||
const m = parseInt(minRaw)
|
||||
minute = isNaN(m) ? null : m
|
||||
}
|
||||
return { name, minute, offset, isPenalty, isOwnGoal }
|
||||
}
|
||||
|
||||
function parseGoalCol(col: string, isPenalty = false, isOwnGoal = false): GoalEntry[] {
|
||||
if (!col?.trim()) return []
|
||||
return col.split('|').map(e => parseGoalStr(e.trim(), isPenalty, isOwnGoal)).filter(Boolean) as GoalEntry[]
|
||||
}
|
||||
|
||||
async function run() {
|
||||
const client = postgres(DATABASE_URL, { max: 5 })
|
||||
const db = drizzle(client)
|
||||
|
||||
// Create tables (mirrors sync.ts DDL — runs first on a fresh DB)
|
||||
await db.execute(sql`
|
||||
CREATE TABLE IF NOT EXISTS tournaments (
|
||||
year INTEGER PRIMARY KEY,
|
||||
host TEXT NOT NULL,
|
||||
winner TEXT,
|
||||
runner_up TEXT,
|
||||
third_place TEXT,
|
||||
fourth_place TEXT,
|
||||
teams_count INTEGER,
|
||||
matches_count INTEGER,
|
||||
total_goals INTEGER,
|
||||
avg_goals_per_game NUMERIC(4,2)
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS teams (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name TEXT UNIQUE NOT NULL,
|
||||
iso2 TEXT,
|
||||
fifa_code TEXT,
|
||||
continent TEXT,
|
||||
confederation TEXT
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS stadiums (
|
||||
id SERIAL PRIMARY KEY,
|
||||
tournament_year INTEGER,
|
||||
name TEXT NOT NULL,
|
||||
city TEXT,
|
||||
country_code TEXT,
|
||||
capacity INTEGER,
|
||||
timezone TEXT,
|
||||
coordinates TEXT
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS matches (
|
||||
id SERIAL PRIMARY KEY,
|
||||
tournament_year INTEGER NOT NULL,
|
||||
round TEXT NOT NULL,
|
||||
group_name TEXT,
|
||||
date DATE,
|
||||
time_local TEXT,
|
||||
stadium_id INTEGER,
|
||||
team1_id INTEGER NOT NULL,
|
||||
team2_id INTEGER NOT NULL,
|
||||
score_ft_home INTEGER,
|
||||
score_ft_away INTEGER,
|
||||
score_ht_home INTEGER,
|
||||
score_ht_away INTEGER,
|
||||
score_et_home INTEGER,
|
||||
score_et_away INTEGER,
|
||||
score_p_home INTEGER,
|
||||
score_p_away INTEGER,
|
||||
is_quali_playoff BOOLEAN DEFAULT false
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS matches_unique
|
||||
ON matches (tournament_year, team1_id, team2_id, date, is_quali_playoff);
|
||||
CREATE TABLE IF NOT EXISTS goals (
|
||||
id SERIAL PRIMARY KEY,
|
||||
match_id INTEGER NOT NULL,
|
||||
team_id INTEGER NOT NULL,
|
||||
player_name TEXT NOT NULL,
|
||||
minute INTEGER,
|
||||
minute_offset INTEGER DEFAULT 0,
|
||||
is_penalty BOOLEAN DEFAULT false,
|
||||
is_own_goal BOOLEAN DEFAULT false
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS group_standings (
|
||||
tournament_year INTEGER NOT NULL,
|
||||
group_name TEXT NOT NULL,
|
||||
team_id INTEGER NOT NULL,
|
||||
pos INTEGER,
|
||||
played INTEGER DEFAULT 0,
|
||||
won INTEGER DEFAULT 0,
|
||||
drawn INTEGER DEFAULT 0,
|
||||
lost INTEGER DEFAULT 0,
|
||||
goals_for INTEGER DEFAULT 0,
|
||||
goals_against INTEGER DEFAULT 0,
|
||||
goal_diff INTEGER DEFAULT 0,
|
||||
pts INTEGER DEFAULT 0,
|
||||
PRIMARY KEY (tournament_year, group_name, team_id)
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS squads (
|
||||
id SERIAL PRIMARY KEY,
|
||||
tournament_year INTEGER NOT NULL,
|
||||
team_id INTEGER NOT NULL,
|
||||
player_name TEXT NOT NULL,
|
||||
shirt_number INTEGER,
|
||||
position TEXT,
|
||||
date_of_birth DATE
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS squads_unique
|
||||
ON squads (tournament_year, team_id, shirt_number);
|
||||
`)
|
||||
|
||||
const force = process.argv.includes('--force') || process.argv.includes('-f')
|
||||
|
||||
// Skip if already seeded (idempotency check)
|
||||
if (!force) {
|
||||
const existing = await db.execute(sql`SELECT COUNT(*)::int AS cnt FROM tournaments WHERE year < 2026`)
|
||||
if ((existing[0] as { cnt: number }).cnt > 0) {
|
||||
console.log('✓ Already seeded (historical data present), skipping. Use --force to re-import.')
|
||||
await client.end()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if (force) {
|
||||
console.log('--force: clearing historical data...')
|
||||
await db.execute(sql`DELETE FROM goals WHERE match_id IN (SELECT id FROM matches WHERE tournament_year < 2026)`)
|
||||
await db.execute(sql`DELETE FROM matches WHERE tournament_year < 2026`)
|
||||
await db.execute(sql`DELETE FROM tournaments WHERE year < 2026`)
|
||||
}
|
||||
|
||||
console.log('Seeding from Kaggle data (1930–2022)...')
|
||||
|
||||
const teamCache = new Map<string, number>()
|
||||
|
||||
async function upsertTeam(rawName: string): Promise<number> {
|
||||
const name = normTeam(rawName)
|
||||
if (teamCache.has(name)) return teamCache.get(name)!
|
||||
const [row] = await db.execute(sql`
|
||||
INSERT INTO teams (name, iso2)
|
||||
VALUES (${name}, ${getIso(name) ?? null})
|
||||
ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name
|
||||
RETURNING id
|
||||
`)
|
||||
const id = (row as { id: number }).id
|
||||
teamCache.set(name, id)
|
||||
return id
|
||||
}
|
||||
|
||||
// 1. Tournaments from world_cup.csv
|
||||
const wcRows = parseCsv(readFileSync(path.join(DATA_DIR, 'world_cup.csv'), 'utf-8'))
|
||||
for (const r of wcRows) {
|
||||
const year = parseInt(r['Year'])
|
||||
if (isNaN(year)) continue
|
||||
const winner = normTeam(r['Champion'] || '')
|
||||
const runnerUp = normTeam(r['Runner-Up'] || '')
|
||||
const p = PLACEMENTS[year] ?? {}
|
||||
await db.execute(sql`
|
||||
INSERT INTO tournaments (year, host, winner, runner_up, third_place, fourth_place, teams_count, matches_count)
|
||||
VALUES (
|
||||
${year}, ${r['Host']},
|
||||
${winner || null}, ${runnerUp || null},
|
||||
${p.third ?? null}, ${p.fourth ?? null},
|
||||
${parseInt(r['Teams']) || null}, ${parseInt(r['Matches']) || null}
|
||||
)
|
||||
ON CONFLICT (year) DO UPDATE SET
|
||||
host = EXCLUDED.host,
|
||||
winner = EXCLUDED.winner,
|
||||
runner_up = EXCLUDED.runner_up,
|
||||
third_place = EXCLUDED.third_place,
|
||||
fourth_place = EXCLUDED.fourth_place,
|
||||
teams_count = EXCLUDED.teams_count,
|
||||
matches_count = EXCLUDED.matches_count
|
||||
`)
|
||||
}
|
||||
|
||||
// 2. Matches + goals from matches_1930_2022.csv
|
||||
const matchRows = parseCsv(readFileSync(path.join(DATA_DIR, 'matches_1930_2022.csv'), 'utf-8'))
|
||||
|
||||
let totalMatches = 0, totalGoals = 0
|
||||
for (const r of matchRows) {
|
||||
const year = parseInt(r['Year'])
|
||||
if (isNaN(year)) continue
|
||||
|
||||
const t1Id = await upsertTeam(r['home_team'])
|
||||
const t2Id = await upsertTeam(r['away_team'])
|
||||
|
||||
const homeScore = r['home_score'] !== '' ? parseInt(r['home_score']) : null
|
||||
const awayScore = r['away_score'] !== '' ? parseInt(r['away_score']) : null
|
||||
const homePen = r['home_penalty'] !== '' ? parseInt(r['home_penalty']) : null
|
||||
const awayPen = r['away_penalty'] !== '' ? parseInt(r['away_penalty']) : null
|
||||
const dateStr = r['Date'] || null
|
||||
|
||||
// Parse all goal columns
|
||||
const homeGoals = parseGoalCol(r['home_goal'])
|
||||
const awayGoals = parseGoalCol(r['away_goal'])
|
||||
const homePenGoals = parseGoalCol(r['home_penalty_goal'], true)
|
||||
const awayPenGoals = parseGoalCol(r['away_penalty_goal'], true)
|
||||
// home_own_goal = home player scored OG → goal credited to AWAY team
|
||||
const homeOgGoals = parseGoalCol(r['home_own_goal'], false, true)
|
||||
// away_own_goal = away player scored OG → goal credited to HOME team
|
||||
const awayOgGoals = parseGoalCol(r['away_own_goal'], false, true)
|
||||
|
||||
// Determine FT vs ET score split from goal minutes
|
||||
const allGoals = [...homeGoals, ...awayGoals, ...homePenGoals, ...awayPenGoals]
|
||||
const hasEt = allGoals.some(g => g.minute !== null && g.minute > 90)
|
||||
|
||||
let scoreFtHome: number | null, scoreFtAway: number | null
|
||||
let scoreEtHome: number | null = null, scoreEtAway: number | null = null
|
||||
|
||||
if (hasEt) {
|
||||
// Compute FT from goals in minutes 1–90
|
||||
const ftGoalCount = (goals: GoalEntry[]) =>
|
||||
goals.filter(g => g.minute === null || g.minute <= 90).length
|
||||
scoreFtHome = ftGoalCount(homeGoals) + ftGoalCount(homePenGoals) + ftGoalCount(awayOgGoals)
|
||||
scoreFtAway = ftGoalCount(awayGoals) + ftGoalCount(awayPenGoals) + ftGoalCount(homeOgGoals)
|
||||
scoreEtHome = homeScore
|
||||
scoreEtAway = awayScore
|
||||
} else {
|
||||
scoreFtHome = homeScore
|
||||
scoreFtAway = awayScore
|
||||
}
|
||||
|
||||
const [matchRow] = await db.execute(sql`
|
||||
INSERT INTO matches (
|
||||
tournament_year, round, date, team1_id, team2_id,
|
||||
score_ft_home, score_ft_away, score_et_home, score_et_away,
|
||||
score_p_home, score_p_away, is_quali_playoff
|
||||
) VALUES (
|
||||
${year}, ${r['Round'] || 'Unknown'}, ${dateStr},
|
||||
${t1Id}, ${t2Id},
|
||||
${scoreFtHome}, ${scoreFtAway}, ${scoreEtHome}, ${scoreEtAway},
|
||||
${homePen}, ${awayPen}, false
|
||||
)
|
||||
ON CONFLICT (tournament_year, team1_id, team2_id, date, is_quali_playoff) DO UPDATE SET
|
||||
round = EXCLUDED.round,
|
||||
score_ft_home = EXCLUDED.score_ft_home,
|
||||
score_ft_away = EXCLUDED.score_ft_away,
|
||||
score_et_home = EXCLUDED.score_et_home,
|
||||
score_et_away = EXCLUDED.score_et_away,
|
||||
score_p_home = EXCLUDED.score_p_home,
|
||||
score_p_away = EXCLUDED.score_p_away
|
||||
RETURNING id
|
||||
`)
|
||||
const matchId = (matchRow as { id: number }).id
|
||||
|
||||
await db.execute(sql`DELETE FROM goals WHERE match_id = ${matchId}`)
|
||||
|
||||
// home team goals (+ away player own goals that benefit home)
|
||||
for (const g of [...homeGoals, ...homePenGoals, ...awayOgGoals]) {
|
||||
await db.execute(sql`
|
||||
INSERT INTO goals (match_id, team_id, player_name, minute, minute_offset, is_penalty, is_own_goal)
|
||||
VALUES (${matchId}, ${t1Id}, ${g.name}, ${g.minute}, ${g.offset}, ${g.isPenalty}, ${g.isOwnGoal})
|
||||
`)
|
||||
totalGoals++
|
||||
}
|
||||
// away team goals (+ home player own goals that benefit away)
|
||||
for (const g of [...awayGoals, ...awayPenGoals, ...homeOgGoals]) {
|
||||
await db.execute(sql`
|
||||
INSERT INTO goals (match_id, team_id, player_name, minute, minute_offset, is_penalty, is_own_goal)
|
||||
VALUES (${matchId}, ${t2Id}, ${g.name}, ${g.minute}, ${g.offset}, ${g.isPenalty}, ${g.isOwnGoal})
|
||||
`)
|
||||
totalGoals++
|
||||
}
|
||||
totalMatches++
|
||||
}
|
||||
|
||||
// 3. Update tournament aggregates
|
||||
await db.execute(sql`
|
||||
UPDATE tournaments t SET
|
||||
total_goals = (
|
||||
SELECT COUNT(g.id)::int
|
||||
FROM goals g JOIN matches m ON g.match_id = m.id
|
||||
WHERE m.tournament_year = t.year AND m.is_quali_playoff = false
|
||||
),
|
||||
matches_count = (
|
||||
SELECT COUNT(*)::int FROM matches WHERE tournament_year = t.year AND is_quali_playoff = false
|
||||
),
|
||||
avg_goals_per_game = (
|
||||
SELECT ROUND(COUNT(g.id)::numeric / NULLIF(COUNT(DISTINCT m.id), 0), 2)
|
||||
FROM goals g JOIN matches m ON g.match_id = m.id
|
||||
WHERE m.tournament_year = t.year AND m.is_quali_playoff = false
|
||||
)
|
||||
WHERE t.year < 2026
|
||||
`)
|
||||
|
||||
console.log(`✅ Seed complete: ${totalMatches} matches, ${totalGoals} goals (1930–2022)`)
|
||||
await client.end()
|
||||
}
|
||||
|
||||
run().catch(e => { console.error('Seed failed:', e); process.exit(1) })
|
||||
+99
-205
@@ -6,46 +6,6 @@ import { TEAM_ISO, getIso } from '../lib/iso-codes'
|
||||
const DATABASE_URL = process.env.DATABASE_URL ?? 'postgres://wc:wc@localhost:5432/worldcup'
|
||||
const BASE = 'https://raw.githubusercontent.com/openfootball/worldcup.json/master'
|
||||
|
||||
const YEARS = [
|
||||
1930, 1934, 1938, 1950, 1954, 1958, 1962, 1966, 1970,
|
||||
1974, 1978, 1982, 1986, 1990, 1994, 1998, 2002, 2006,
|
||||
2010, 2014, 2018, 2022, 2026,
|
||||
]
|
||||
|
||||
const HOSTS: Record<number, string> = {
|
||||
1930: 'Uruguay', 1934: 'Italy', 1938: 'France', 1950: 'Brazil',
|
||||
1954: 'Switzerland', 1958: 'Sweden', 1962: 'Chile', 1966: 'England',
|
||||
1970: 'Mexico', 1974: 'Germany', 1978: 'Argentina', 1982: 'Spain',
|
||||
1986: 'Mexico', 1990: 'Italy', 1994: 'USA', 1998: 'France',
|
||||
2002: 'South Korea / Japan', 2006: 'Germany', 2010: 'South Africa',
|
||||
2014: 'Brazil', 2018: 'Russia', 2022: 'Qatar', 2026: 'USA / Canada / Mexico',
|
||||
}
|
||||
|
||||
const WINNERS: Record<number, { winner: string; runnerUp: string; third?: string; fourth?: string }> = {
|
||||
1930: { winner: 'Uruguay', runnerUp: 'Argentina', third: 'USA', fourth: 'Yugoslavia' },
|
||||
1934: { winner: 'Italy', runnerUp: 'Czechoslovakia', third: 'Germany', fourth: 'Austria' },
|
||||
1938: { winner: 'Italy', runnerUp: 'Hungary', third: 'Brazil', fourth: 'Sweden' },
|
||||
1950: { winner: 'Uruguay', runnerUp: 'Brazil', third: 'Sweden', fourth: 'Spain' },
|
||||
1954: { winner: 'Germany', runnerUp: 'Hungary', third: 'Austria', fourth: 'Uruguay' },
|
||||
1958: { winner: 'Brazil', runnerUp: 'Sweden', third: 'France', fourth: 'Germany' },
|
||||
1962: { winner: 'Brazil', runnerUp: 'Czechoslovakia', third: 'Chile', fourth: 'Yugoslavia' },
|
||||
1966: { winner: 'England', runnerUp: 'Germany', third: 'Portugal', fourth: 'Soviet Union' },
|
||||
1970: { winner: 'Brazil', runnerUp: 'Italy', third: 'Germany', fourth: 'Uruguay' },
|
||||
1974: { winner: 'Germany', runnerUp: 'Netherlands', third: 'Poland', fourth: 'Brazil' },
|
||||
1978: { winner: 'Argentina', runnerUp: 'Netherlands', third: 'Brazil', fourth: 'Italy' },
|
||||
1982: { winner: 'Italy', runnerUp: 'Germany', third: 'Poland', fourth: 'France' },
|
||||
1986: { winner: 'Argentina', runnerUp: 'Germany', third: 'France', fourth: 'Belgium' },
|
||||
1990: { winner: 'Germany', runnerUp: 'Argentina', third: 'Italy', fourth: 'England' },
|
||||
1994: { winner: 'Brazil', runnerUp: 'Italy', third: 'Sweden', fourth: 'Bulgaria' },
|
||||
1998: { winner: 'France', runnerUp: 'Brazil', third: 'Croatia', fourth: 'Netherlands' },
|
||||
2002: { winner: 'Brazil', runnerUp: 'Germany', third: 'Turkey', fourth: 'South Korea' },
|
||||
2006: { winner: 'Italy', runnerUp: 'France', third: 'Germany', fourth: 'Portugal' },
|
||||
2010: { winner: 'Spain', runnerUp: 'Netherlands', third: 'Germany', fourth: 'Uruguay' },
|
||||
2014: { winner: 'Germany', runnerUp: 'Argentina', third: 'Netherlands', fourth: 'Brazil' },
|
||||
2018: { winner: 'France', runnerUp: 'Croatia', third: 'Belgium', fourth: 'England' },
|
||||
2022: { winner: 'Argentina', runnerUp: 'France', third: 'Croatia', fourth: 'Morocco' },
|
||||
}
|
||||
|
||||
async function fetchJson(url: string): Promise<unknown> {
|
||||
try {
|
||||
const res = await fetch(url)
|
||||
@@ -76,7 +36,7 @@ async function run() {
|
||||
const client = postgres(DATABASE_URL, { max: 5 })
|
||||
const db = drizzle(client)
|
||||
|
||||
console.log('Creating tables...')
|
||||
// Safety net — seed.ts should have created these already
|
||||
await db.execute(sql`
|
||||
CREATE TABLE IF NOT EXISTS tournaments (
|
||||
year INTEGER PRIMARY KEY,
|
||||
@@ -128,7 +88,8 @@ async function run() {
|
||||
score_p_away INTEGER,
|
||||
is_quali_playoff BOOLEAN DEFAULT false
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS matches_unique ON matches (tournament_year, team1_id, team2_id, date, is_quali_playoff);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS matches_unique
|
||||
ON matches (tournament_year, team1_id, team2_id, date, is_quali_playoff);
|
||||
CREATE TABLE IF NOT EXISTS goals (
|
||||
id SERIAL PRIMARY KEY,
|
||||
match_id INTEGER NOT NULL,
|
||||
@@ -163,31 +124,27 @@ async function run() {
|
||||
position TEXT,
|
||||
date_of_birth DATE
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS squads_unique ON squads (tournament_year, team_id, shirt_number);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS squads_unique
|
||||
ON squads (tournament_year, team_id, shirt_number);
|
||||
`)
|
||||
|
||||
const TEAM_ALIASES: Record<string, string> = {
|
||||
'West Germany': 'Germany',
|
||||
}
|
||||
|
||||
const teamCache = new Map<string, number>()
|
||||
|
||||
async function upsertTeam(rawName: string, extra?: { iso2?: string; fifaCode?: string; continent?: string; confederation?: string }) {
|
||||
const name = TEAM_ALIASES[rawName] ?? rawName
|
||||
if (teamCache.has(name)) return teamCache.get(name)!
|
||||
const iso2 = extra?.iso2 ?? getIso(name)
|
||||
if (teamCache.has(rawName)) return teamCache.get(rawName)!
|
||||
const iso2 = extra?.iso2 ?? getIso(rawName)
|
||||
const [row] = await db.execute(sql`
|
||||
INSERT INTO teams (name, iso2, fifa_code, continent, confederation)
|
||||
VALUES (${name}, ${iso2 ?? null}, ${extra?.fifaCode ?? null}, ${extra?.continent ?? null}, ${extra?.confederation ?? null})
|
||||
VALUES (${rawName}, ${iso2 ?? null}, ${extra?.fifaCode ?? null}, ${extra?.continent ?? null}, ${extra?.confederation ?? null})
|
||||
ON CONFLICT (name) DO UPDATE SET
|
||||
iso2 = COALESCE(EXCLUDED.iso2, teams.iso2),
|
||||
fifa_code = COALESCE(EXCLUDED.fifa_code, teams.fifa_code),
|
||||
continent = COALESCE(EXCLUDED.continent, teams.continent),
|
||||
iso2 = COALESCE(EXCLUDED.iso2, teams.iso2),
|
||||
fifa_code = COALESCE(EXCLUDED.fifa_code, teams.fifa_code),
|
||||
continent = COALESCE(EXCLUDED.continent, teams.continent),
|
||||
confederation = COALESCE(EXCLUDED.confederation, teams.confederation)
|
||||
RETURNING id
|
||||
`)
|
||||
const id = (row as { id: number }).id
|
||||
teamCache.set(name, id)
|
||||
teamCache.set(rawName, id)
|
||||
return id
|
||||
}
|
||||
|
||||
@@ -210,16 +167,16 @@ async function run() {
|
||||
${isQuali}
|
||||
)
|
||||
ON CONFLICT (tournament_year, team1_id, team2_id, date, is_quali_playoff) DO UPDATE SET
|
||||
round = EXCLUDED.round,
|
||||
round = EXCLUDED.round,
|
||||
time_local = COALESCE(EXCLUDED.time_local, matches.time_local),
|
||||
score_ft_home = COALESCE(EXCLUDED.score_ft_home, matches.score_ft_home),
|
||||
score_ft_away = COALESCE(EXCLUDED.score_ft_away, matches.score_ft_away),
|
||||
score_ht_home = COALESCE(EXCLUDED.score_ht_home, matches.score_ht_home),
|
||||
score_ht_away = COALESCE(EXCLUDED.score_ht_away, matches.score_ht_away),
|
||||
score_et_home = COALESCE(EXCLUDED.score_et_home, matches.score_et_home),
|
||||
score_et_away = COALESCE(EXCLUDED.score_et_away, matches.score_et_away),
|
||||
score_p_home = COALESCE(EXCLUDED.score_p_home, matches.score_p_home),
|
||||
score_p_away = COALESCE(EXCLUDED.score_p_away, matches.score_p_away),
|
||||
time_local = COALESCE(EXCLUDED.time_local, matches.time_local)
|
||||
score_p_home = COALESCE(EXCLUDED.score_p_home, matches.score_p_home),
|
||||
score_p_away = COALESCE(EXCLUDED.score_p_away, matches.score_p_away)
|
||||
RETURNING id
|
||||
`)
|
||||
return (rows[0] as { id: number }).id
|
||||
@@ -238,61 +195,51 @@ async function run() {
|
||||
}
|
||||
}
|
||||
|
||||
for (const year of YEARS) {
|
||||
console.log(`\nSyncing ${year}...`)
|
||||
console.log('\nSyncing 2026...')
|
||||
|
||||
// 1. Upsert tournament
|
||||
const winData = WINNERS[year]
|
||||
await db.execute(sql`
|
||||
INSERT INTO tournaments (year, host, winner, runner_up, third_place, fourth_place)
|
||||
VALUES (${year}, ${HOSTS[year]}, ${winData?.winner ?? null}, ${winData?.runnerUp ?? null},
|
||||
${winData?.third ?? null}, ${winData?.fourth ?? null})
|
||||
ON CONFLICT (year) DO UPDATE SET
|
||||
winner = COALESCE(EXCLUDED.winner, tournaments.winner),
|
||||
runner_up = COALESCE(EXCLUDED.runner_up, tournaments.runner_up)
|
||||
`)
|
||||
// Upsert 2026 tournament row (no winner yet)
|
||||
await db.execute(sql`
|
||||
INSERT INTO tournaments (year, host)
|
||||
VALUES (2026, 'USA / Canada / Mexico')
|
||||
ON CONFLICT (year) DO NOTHING
|
||||
`)
|
||||
|
||||
// 2. Teams enrichment
|
||||
const teamsData = await fetchJson(`${BASE}/${year}/worldcup.teams.json`) as Record<string, unknown>[] | null
|
||||
if (teamsData && Array.isArray(teamsData)) {
|
||||
for (const t of teamsData) {
|
||||
const name = (t.name ?? t.name_normalised) as string
|
||||
const iso2 = (t.flag_icon as string)?.match(/[\uD83C][\uDDE6-\uDDFF][\uD83C][\uDDE6-\uDDFF]/)?.[0]
|
||||
? TEAM_ISO[name as string] ?? getIso(name)
|
||||
: TEAM_ISO[name as string] ?? getIso(name)
|
||||
await upsertTeam(name, {
|
||||
iso2: iso2,
|
||||
fifaCode: t.fifa_code as string,
|
||||
continent: t.continent as string,
|
||||
confederation: t.confed as string,
|
||||
})
|
||||
}
|
||||
// Teams enrichment
|
||||
const teamsData = await fetchJson(`${BASE}/2026/worldcup.teams.json`) as Record<string, unknown>[] | null
|
||||
if (teamsData && Array.isArray(teamsData)) {
|
||||
for (const t of teamsData) {
|
||||
const name = (t.name ?? t.name_normalised) as string
|
||||
await upsertTeam(name, {
|
||||
iso2: TEAM_ISO[name] ?? getIso(name),
|
||||
fifaCode: t.fifa_code as string,
|
||||
continent: t.continent as string,
|
||||
confederation: t.confed as string,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Stadiums
|
||||
const stadiumsData = await fetchJson(`${BASE}/${year}/worldcup.stadiums.json`) as { stadiums?: Record<string, unknown>[] } | null
|
||||
if (stadiumsData?.stadiums) {
|
||||
for (const s of stadiumsData.stadiums) {
|
||||
await db.execute(sql`
|
||||
INSERT INTO stadiums (tournament_year, name, city, country_code, capacity, timezone, coordinates)
|
||||
VALUES (${year}, ${s.name as string}, ${s.city as string}, ${(s.cc as string | undefined) ?? null},
|
||||
${(s.capacity as number | undefined) ?? null}, ${(s.timezone as string | undefined) ?? null}, ${(s.coords as string | undefined) ?? null})
|
||||
ON CONFLICT DO NOTHING
|
||||
`)
|
||||
}
|
||||
// Stadiums
|
||||
const stadiumsData = await fetchJson(`${BASE}/2026/worldcup.stadiums.json`) as { stadiums?: Record<string, unknown>[] } | null
|
||||
if (stadiumsData?.stadiums) {
|
||||
for (const s of stadiumsData.stadiums) {
|
||||
await db.execute(sql`
|
||||
INSERT INTO stadiums (tournament_year, name, city, country_code, capacity, timezone, coordinates)
|
||||
VALUES (2026, ${s.name as string}, ${s.city as string}, ${(s.cc as string | undefined) ?? null},
|
||||
${(s.capacity as number | undefined) ?? null}, ${(s.timezone as string | undefined) ?? null}, ${(s.coords as string | undefined) ?? null})
|
||||
ON CONFLICT DO NOTHING
|
||||
`)
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Main matches
|
||||
const mainData = await fetchJson(`${BASE}/${year}/worldcup.json`) as RawData | null
|
||||
if (!mainData?.matches) { console.log(` No match data`); continue }
|
||||
|
||||
let matchCount = 0, goalCount = 0
|
||||
// Main matches
|
||||
const mainData = await fetchJson(`${BASE}/2026/worldcup.json`) as RawData | null
|
||||
let matchCount = 0, goalCount = 0
|
||||
if (mainData?.matches) {
|
||||
for (const m of mainData.matches) {
|
||||
const t1Id = await upsertTeam(m.team1)
|
||||
const t2Id = await upsertTeam(m.team2)
|
||||
const score = parseScore(m.score)
|
||||
const group = m.group ?? null
|
||||
const matchId = await upsertMatch(year, m.round ?? 'Unknown', group, m.date ?? null, m.time ?? null, t1Id, t2Id, score, false)
|
||||
const matchId = await upsertMatch(2026, m.round ?? 'Unknown', m.group ?? null, m.date ?? null, m.time ?? null, t1Id, t2Id, score, false)
|
||||
if (m.goals1?.length || m.goals2?.length) {
|
||||
await db.execute(sql`DELETE FROM goals WHERE match_id = ${matchId}`)
|
||||
if (m.goals1?.length) await syncGoals(matchId, t1Id, m.goals1, t2Id)
|
||||
@@ -301,108 +248,41 @@ async function run() {
|
||||
matchCount++
|
||||
goalCount += (m.goals1?.length ?? 0) + (m.goals2?.length ?? 0)
|
||||
}
|
||||
|
||||
// 5. Standings (2014, 2018)
|
||||
const standingsData = await fetchJson(`${BASE}/${year}/worldcup.standings.json`) as { groups?: Record<string, unknown>[] } | null
|
||||
if (standingsData?.groups) {
|
||||
for (const grp of standingsData.groups) {
|
||||
const standings = grp.standings as Record<string, unknown>[]
|
||||
for (const s of standings) {
|
||||
const t = s.team as { name: string; code: string }
|
||||
const teamId = await upsertTeam(t.name, { fifaCode: t.code })
|
||||
await db.execute(sql`
|
||||
INSERT INTO group_standings (tournament_year, group_name, team_id, pos, played, won, drawn, lost, goals_for, goals_against, goal_diff, pts)
|
||||
VALUES (${year}, ${grp.name as string}, ${teamId}, ${s.pos as number ?? null},
|
||||
${s.played as number ?? 0}, ${s.won as number ?? 0}, ${s.drawn as number ?? 0}, ${s.lost as number ?? 0},
|
||||
${s.goals_for as number ?? 0}, ${s.goals_against as number ?? 0},
|
||||
${((s.goals_for as number ?? 0) - (s.goals_against as number ?? 0))},
|
||||
${s.pts as number ?? 0})
|
||||
ON CONFLICT (tournament_year, group_name, team_id) DO UPDATE SET
|
||||
pos = EXCLUDED.pos, played = EXCLUDED.played, won = EXCLUDED.won,
|
||||
drawn = EXCLUDED.drawn, lost = EXCLUDED.lost, goals_for = EXCLUDED.goals_for,
|
||||
goals_against = EXCLUDED.goals_against, goal_diff = EXCLUDED.goal_diff, pts = EXCLUDED.pts
|
||||
`)
|
||||
}
|
||||
}
|
||||
} else if (year !== 2026) {
|
||||
// Compute standings from match results for years without standings.json
|
||||
await db.execute(sql`
|
||||
WITH match_results AS (
|
||||
SELECT tournament_year, group_name,
|
||||
team1_id AS team_id,
|
||||
score_ft_home AS gf, score_ft_away AS ga
|
||||
FROM matches WHERE tournament_year = ${year} AND group_name IS NOT NULL AND is_quali_playoff = false AND score_ft_home IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT tournament_year, group_name,
|
||||
team2_id, score_ft_away, score_ft_home
|
||||
FROM matches WHERE tournament_year = ${year} AND group_name IS NOT NULL AND is_quali_playoff = false AND score_ft_home IS NOT NULL
|
||||
)
|
||||
INSERT INTO group_standings (tournament_year, group_name, team_id, played, won, drawn, lost, goals_for, goals_against, goal_diff, pts)
|
||||
SELECT
|
||||
tournament_year, group_name, team_id,
|
||||
COUNT(*)::int, SUM(CASE WHEN gf > ga THEN 1 ELSE 0 END)::int,
|
||||
SUM(CASE WHEN gf = ga THEN 1 ELSE 0 END)::int,
|
||||
SUM(CASE WHEN gf < ga THEN 1 ELSE 0 END)::int,
|
||||
SUM(gf)::int, SUM(ga)::int, SUM(gf - ga)::int,
|
||||
(SUM(CASE WHEN gf > ga THEN 3 WHEN gf = ga THEN 1 ELSE 0 END))::int
|
||||
FROM match_results
|
||||
GROUP BY tournament_year, group_name, team_id
|
||||
ON CONFLICT (tournament_year, group_name, team_id) DO UPDATE SET
|
||||
played = EXCLUDED.played, won = EXCLUDED.won, drawn = EXCLUDED.drawn,
|
||||
lost = EXCLUDED.lost, goals_for = EXCLUDED.goals_for, goals_against = EXCLUDED.goals_against,
|
||||
goal_diff = EXCLUDED.goal_diff, pts = EXCLUDED.pts
|
||||
`)
|
||||
}
|
||||
|
||||
// 6. Squads (2026)
|
||||
const squadsData = await fetchJson(`${BASE}/${year}/worldcup.squads.json`) as Record<string, unknown>[] | null
|
||||
if (squadsData && Array.isArray(squadsData)) {
|
||||
for (const sq of squadsData) {
|
||||
const teamId = await upsertTeam(sq.name as string)
|
||||
for (const p of (sq.players as Record<string, unknown>[])) {
|
||||
await db.execute(sql`
|
||||
INSERT INTO squads (tournament_year, team_id, player_name, shirt_number, position, date_of_birth)
|
||||
VALUES (${year}, ${teamId}, ${p.name as string}, ${p.number as number ?? null},
|
||||
${p.pos as string ?? null}, ${p.date_of_birth as string ?? null})
|
||||
ON CONFLICT (tournament_year, team_id, shirt_number) DO UPDATE SET
|
||||
player_name = EXCLUDED.player_name, position = EXCLUDED.position, date_of_birth = EXCLUDED.date_of_birth
|
||||
`)
|
||||
}
|
||||
}
|
||||
console.log(` Squads loaded for ${year}`)
|
||||
}
|
||||
|
||||
// 7. Quali playoffs (2026)
|
||||
const qualiData = await fetchJson(`${BASE}/${year}/worldcup.quali_playoffs.json`) as RawData | null
|
||||
if (qualiData?.matches) {
|
||||
for (const m of qualiData.matches) {
|
||||
const t1Id = await upsertTeam(m.team1)
|
||||
const t2Id = await upsertTeam(m.team2)
|
||||
const score = parseScore(m.score)
|
||||
const matchId = await upsertMatch(year, m.round ?? 'Qualifier', null, m.date ?? null, m.time ?? null, t1Id, t2Id, score, true)
|
||||
if (m.goals1?.length) await syncGoals(matchId, t1Id, m.goals1, t2Id)
|
||||
if (m.goals2?.length) await syncGoals(matchId, t2Id, m.goals2, t1Id)
|
||||
}
|
||||
console.log(` Quali playoffs: ${qualiData.matches.length} matches`)
|
||||
}
|
||||
|
||||
// 8. Recompute tournament aggregates
|
||||
await db.execute(sql`
|
||||
UPDATE tournaments SET
|
||||
matches_count = (SELECT COUNT(*)::int FROM matches WHERE tournament_year = ${year} AND is_quali_playoff = false),
|
||||
total_goals = (SELECT COALESCE(SUM(score_ft_home + score_ft_away), 0)::int FROM matches WHERE tournament_year = ${year} AND is_quali_playoff = false AND score_ft_home IS NOT NULL),
|
||||
avg_goals_per_game = (
|
||||
SELECT ROUND(COALESCE(SUM(score_ft_home + score_ft_away), 0)::numeric / NULLIF(COUNT(*), 0), 2)
|
||||
FROM matches
|
||||
WHERE tournament_year = ${year} AND is_quali_playoff = false AND score_ft_home IS NOT NULL
|
||||
)
|
||||
WHERE year = ${year}
|
||||
`)
|
||||
|
||||
console.log(` ✓ ${matchCount} matches, ${goalCount} goals`)
|
||||
}
|
||||
|
||||
// Compute 2026 group standings from match results
|
||||
// Squads
|
||||
const squadsData = await fetchJson(`${BASE}/2026/worldcup.squads.json`) as Record<string, unknown>[] | null
|
||||
if (squadsData && Array.isArray(squadsData)) {
|
||||
for (const sq of squadsData) {
|
||||
const teamId = await upsertTeam(sq.name as string)
|
||||
for (const p of (sq.players as Record<string, unknown>[])) {
|
||||
await db.execute(sql`
|
||||
INSERT INTO squads (tournament_year, team_id, player_name, shirt_number, position, date_of_birth)
|
||||
VALUES (2026, ${teamId}, ${p.name as string}, ${p.number as number ?? null},
|
||||
${p.pos as string ?? null}, ${p.date_of_birth as string ?? null})
|
||||
ON CONFLICT (tournament_year, team_id, shirt_number) DO UPDATE SET
|
||||
player_name = EXCLUDED.player_name, position = EXCLUDED.position, date_of_birth = EXCLUDED.date_of_birth
|
||||
`)
|
||||
}
|
||||
}
|
||||
console.log(' Squads loaded for 2026')
|
||||
}
|
||||
|
||||
// Quali playoffs
|
||||
const qualiData = await fetchJson(`${BASE}/2026/worldcup.quali_playoffs.json`) as RawData | null
|
||||
if (qualiData?.matches) {
|
||||
for (const m of qualiData.matches) {
|
||||
const t1Id = await upsertTeam(m.team1)
|
||||
const t2Id = await upsertTeam(m.team2)
|
||||
const score = parseScore(m.score)
|
||||
const matchId = await upsertMatch(2026, m.round ?? 'Qualifier', null, m.date ?? null, m.time ?? null, t1Id, t2Id, score, true)
|
||||
if (m.goals1?.length) await syncGoals(matchId, t1Id, m.goals1, t2Id)
|
||||
if (m.goals2?.length) await syncGoals(matchId, t2Id, m.goals2, t1Id)
|
||||
}
|
||||
console.log(` Quali playoffs: ${qualiData.matches.length} matches`)
|
||||
}
|
||||
|
||||
// Group standings from match results
|
||||
await db.execute(sql`
|
||||
WITH match_results AS (
|
||||
SELECT tournament_year, group_name, team1_id AS team_id, score_ft_home AS gf, score_ft_away AS ga
|
||||
@@ -413,7 +293,8 @@ async function run() {
|
||||
)
|
||||
INSERT INTO group_standings (tournament_year, group_name, team_id, played, won, drawn, lost, goals_for, goals_against, goal_diff, pts)
|
||||
SELECT tournament_year, group_name, team_id,
|
||||
COUNT(*)::int, SUM(CASE WHEN gf > ga THEN 1 ELSE 0 END)::int,
|
||||
COUNT(*)::int,
|
||||
SUM(CASE WHEN gf > ga THEN 1 ELSE 0 END)::int,
|
||||
SUM(CASE WHEN gf = ga THEN 1 ELSE 0 END)::int,
|
||||
SUM(CASE WHEN gf < ga THEN 1 ELSE 0 END)::int,
|
||||
SUM(gf)::int, SUM(ga)::int, SUM(gf - ga)::int,
|
||||
@@ -426,6 +307,19 @@ async function run() {
|
||||
goal_diff = EXCLUDED.goal_diff, pts = EXCLUDED.pts
|
||||
`)
|
||||
|
||||
// Tournament aggregates
|
||||
await db.execute(sql`
|
||||
UPDATE tournaments SET
|
||||
matches_count = (SELECT COUNT(*)::int FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false),
|
||||
total_goals = (SELECT COALESCE(SUM(score_ft_home + score_ft_away), 0)::int FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false AND score_ft_home IS NOT NULL),
|
||||
avg_goals_per_game = (
|
||||
SELECT ROUND(COALESCE(SUM(score_ft_home + score_ft_away), 0)::numeric / NULLIF(COUNT(*), 0), 2)
|
||||
FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false AND score_ft_home IS NOT NULL
|
||||
)
|
||||
WHERE year = 2026
|
||||
`)
|
||||
|
||||
console.log(` ✓ ${matchCount} matches, ${goalCount} goals`)
|
||||
console.log('\n✅ Sync complete!')
|
||||
await client.end()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user