b141356247
- Add --color-green-mid token (#4a7a55) to @theme for dimmer stat values
- Replace all text-[#hex]/bg-[#hex] arbitrary values with named tokens:
text-green, text-green-light, text-green-sec, text-green-muted,
text-green-dark, text-green-mid, text-text, bg-card, bg-bg, border-border
- Replace rgba(34,197,94,X) inline styles with bg-green/X opacity modifiers
- Convert single-prop style={{ borderColor/background }} to className
- Fix SVG stroke="#dff5e8" → stroke="currentColor"
- Use CSS variables in globals.css base styles (background-color, color)
- Move app/data/wikipedia/ → data/ (project root, not inside Next.js app dir)
- Update Dockerfile, seed.ts, scrape-wikipedia.ts paths accordingly
- Remove unused app/data/world_cup.csv
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
377 lines
14 KiB
TypeScript
377 lines
14 KiB
TypeScript
import postgres from 'postgres'
|
||
import { drizzle } from 'drizzle-orm/postgres-js'
|
||
import { sql } from 'drizzle-orm'
|
||
import { readFileSync, existsSync } from 'fs'
|
||
import path from 'path'
|
||
import { fileURLToPath } from 'url'
|
||
import { getIso } from '../lib/iso-codes'
|
||
import { normalizeTeam } from '../lib/wiki-scraper'
|
||
|
||
const DATABASE_URL = process.env.DATABASE_URL ?? 'postgres://wc:wc@localhost:5432/worldcup'
|
||
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
||
const WC_DIR = path.join(__dirname, '../data/wikipedia')
|
||
|
||
const YEARS = [
|
||
1930,1934,1938,1950,1954,1958,1962,1966,1970,1974,
|
||
1978,1982,1986,1990,1994,1998,2002,2006,2010,2014,2018,2022,
|
||
]
|
||
|
||
|
||
function readJson<T>(filePath: string): T | null {
|
||
if (!existsSync(filePath)) return null
|
||
try { return JSON.parse(readFileSync(filePath, 'utf-8')) as T } catch { return null }
|
||
}
|
||
|
||
// ── Types matching scrape-wikipedia.ts output ──────────────────────────────
|
||
|
||
type RawGoal = { name: string; minute?: string | number; offset?: number; penalty?: boolean; owngoal?: boolean }
|
||
type RawScore = { ft?: number[]; ht?: number[]; et?: number[]; p?: number[] }
|
||
type RawMatch = {
|
||
round?: string; date?: string; time?: string;
|
||
team1: string; team2: string; score?: RawScore;
|
||
goals1?: RawGoal[]; goals2?: RawGoal[];
|
||
group?: string; ground?: string;
|
||
}
|
||
type RawData = { matches: RawMatch[] }
|
||
type RawMeta = { host: string; teams_count: number | null; winner: string | null; runner_up: string | null; third_place: string | null; fourth_place: string | null }
|
||
type RawStadiums = { stadiums: { name: string; city: string; cc?: string; capacity?: number; timezone?: string; coords?: string }[] }
|
||
type RawSquad = { name: string; players: { name: string; number?: number; pos?: string; date_of_birth?: string }[] }
|
||
|
||
function parseScore(score: RawScore | undefined) {
|
||
if (!score) return {}
|
||
if (Array.isArray(score)) return { ft: score as number[] }
|
||
return { ft: score.ft, ht: score.ht, et: score.et, p: score.p }
|
||
}
|
||
|
||
async function run() {
|
||
const client = postgres(DATABASE_URL, { max: 5 })
|
||
const db = drizzle(client)
|
||
|
||
// Create tables
|
||
await db.execute(sql`
|
||
CREATE TABLE IF NOT EXISTS tournaments (
|
||
year INTEGER PRIMARY KEY,
|
||
host TEXT NOT NULL,
|
||
winner TEXT,
|
||
runner_up TEXT,
|
||
third_place TEXT,
|
||
fourth_place TEXT,
|
||
teams_count INTEGER,
|
||
matches_count INTEGER,
|
||
total_goals INTEGER,
|
||
avg_goals_per_game NUMERIC(4,2)
|
||
);
|
||
CREATE TABLE IF NOT EXISTS teams (
|
||
id SERIAL PRIMARY KEY,
|
||
name TEXT UNIQUE NOT NULL,
|
||
iso2 TEXT,
|
||
fifa_code TEXT,
|
||
continent TEXT,
|
||
confederation TEXT
|
||
);
|
||
CREATE TABLE IF NOT EXISTS stadiums (
|
||
id SERIAL PRIMARY KEY,
|
||
tournament_year INTEGER,
|
||
name TEXT NOT NULL,
|
||
city TEXT,
|
||
country_code TEXT,
|
||
capacity INTEGER,
|
||
timezone TEXT,
|
||
coordinates TEXT
|
||
);
|
||
CREATE TABLE IF NOT EXISTS matches (
|
||
id SERIAL PRIMARY KEY,
|
||
tournament_year INTEGER NOT NULL,
|
||
round TEXT NOT NULL,
|
||
group_name TEXT,
|
||
date DATE,
|
||
time_local TEXT,
|
||
stadium_id INTEGER,
|
||
team1_id INTEGER NOT NULL,
|
||
team2_id INTEGER NOT NULL,
|
||
score_ft_home INTEGER,
|
||
score_ft_away INTEGER,
|
||
score_ht_home INTEGER,
|
||
score_ht_away INTEGER,
|
||
score_et_home INTEGER,
|
||
score_et_away INTEGER,
|
||
score_p_home INTEGER,
|
||
score_p_away INTEGER,
|
||
is_quali_playoff BOOLEAN DEFAULT false
|
||
);
|
||
CREATE UNIQUE INDEX IF NOT EXISTS matches_unique
|
||
ON matches (tournament_year, team1_id, team2_id, date, is_quali_playoff);
|
||
CREATE TABLE IF NOT EXISTS goals (
|
||
id SERIAL PRIMARY KEY,
|
||
match_id INTEGER NOT NULL,
|
||
team_id INTEGER NOT NULL,
|
||
player_name TEXT NOT NULL,
|
||
minute INTEGER,
|
||
minute_offset INTEGER DEFAULT 0,
|
||
is_penalty BOOLEAN DEFAULT false,
|
||
is_own_goal BOOLEAN DEFAULT false
|
||
);
|
||
CREATE TABLE IF NOT EXISTS group_standings (
|
||
tournament_year INTEGER NOT NULL,
|
||
group_name TEXT NOT NULL,
|
||
team_id INTEGER NOT NULL,
|
||
pos INTEGER,
|
||
played INTEGER DEFAULT 0,
|
||
won INTEGER DEFAULT 0,
|
||
drawn INTEGER DEFAULT 0,
|
||
lost INTEGER DEFAULT 0,
|
||
goals_for INTEGER DEFAULT 0,
|
||
goals_against INTEGER DEFAULT 0,
|
||
goal_diff INTEGER DEFAULT 0,
|
||
pts INTEGER DEFAULT 0,
|
||
PRIMARY KEY (tournament_year, group_name, team_id)
|
||
);
|
||
CREATE TABLE IF NOT EXISTS squads (
|
||
id SERIAL PRIMARY KEY,
|
||
tournament_year INTEGER NOT NULL,
|
||
team_id INTEGER NOT NULL,
|
||
player_name TEXT NOT NULL,
|
||
shirt_number INTEGER,
|
||
position TEXT,
|
||
date_of_birth DATE
|
||
);
|
||
CREATE UNIQUE INDEX IF NOT EXISTS squads_unique
|
||
ON squads (tournament_year, team_id, shirt_number);
|
||
`)
|
||
|
||
const force = process.argv.includes('--force') || process.argv.includes('-f')
|
||
|
||
if (!force) {
|
||
const existing = await db.execute(sql`SELECT COUNT(*)::int AS cnt FROM tournaments WHERE year < 2026`)
|
||
if ((existing[0] as { cnt: number }).cnt > 0) {
|
||
console.log('✓ Already seeded (historical data present), skipping. Use --force to re-import.')
|
||
await client.end()
|
||
return
|
||
}
|
||
}
|
||
|
||
if (force) {
|
||
console.log('--force: clearing historical data...')
|
||
await db.execute(sql`DELETE FROM goals WHERE match_id IN (SELECT id FROM matches WHERE tournament_year < 2026)`)
|
||
await db.execute(sql`DELETE FROM squads WHERE tournament_year < 2026`)
|
||
await db.execute(sql`DELETE FROM group_standings WHERE tournament_year < 2026`)
|
||
await db.execute(sql`DELETE FROM stadiums WHERE tournament_year < 2026`)
|
||
await db.execute(sql`DELETE FROM matches WHERE tournament_year < 2026`)
|
||
await db.execute(sql`DELETE FROM tournaments WHERE year < 2026`)
|
||
}
|
||
|
||
console.log('Seeding historical data (1930–2022)...')
|
||
|
||
const teamCache = new Map<string, number>()
|
||
|
||
async function upsertTeam(rawName: string): Promise<number> {
|
||
const name = normalizeTeam(rawName)
|
||
if (teamCache.has(name)) return teamCache.get(name)!
|
||
const iso2 = getIso(name)
|
||
const [row] = await db.execute(sql`
|
||
INSERT INTO teams (name, iso2)
|
||
VALUES (${name}, ${iso2 ?? null})
|
||
ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name
|
||
RETURNING id
|
||
`)
|
||
const id = (row as { id: number }).id
|
||
teamCache.set(name, id)
|
||
return id
|
||
}
|
||
|
||
// Per-year data from Wikipedia JSON files
|
||
let totalMatches = 0
|
||
let totalGoals = 0
|
||
|
||
for (const year of YEARS) {
|
||
const yearDir = path.join(WC_DIR, String(year))
|
||
const mainData = readJson<RawData>(path.join(yearDir, 'worldcup.json'))
|
||
if (!mainData?.matches) {
|
||
console.log(` ${year}: no data file, skipping`)
|
||
continue
|
||
}
|
||
|
||
// Tournament row from meta.json
|
||
const meta = readJson<RawMeta>(path.join(yearDir, 'worldcup.meta.json'))
|
||
if (meta) {
|
||
await db.execute(sql`
|
||
INSERT INTO tournaments (year, host, winner, runner_up, third_place, fourth_place, teams_count)
|
||
VALUES (
|
||
${year}, ${meta.host || null},
|
||
${normalizeTeam(meta.winner ?? '') || null}, ${normalizeTeam(meta.runner_up ?? '') || null},
|
||
${normalizeTeam(meta.third_place ?? '') || null}, ${normalizeTeam(meta.fourth_place ?? '') || null},
|
||
${meta.teams_count ?? null}
|
||
)
|
||
ON CONFLICT (year) DO UPDATE SET
|
||
host = EXCLUDED.host,
|
||
winner = EXCLUDED.winner,
|
||
runner_up = EXCLUDED.runner_up,
|
||
third_place = EXCLUDED.third_place,
|
||
fourth_place = EXCLUDED.fourth_place,
|
||
teams_count = EXCLUDED.teams_count
|
||
`)
|
||
}
|
||
|
||
let matchCount = 0, goalCount = 0
|
||
|
||
// Stadiums
|
||
const stadiumsData = readJson<RawStadiums>(path.join(yearDir, 'worldcup.stadiums.json'))
|
||
if (stadiumsData?.stadiums) {
|
||
for (const s of stadiumsData.stadiums) {
|
||
await db.execute(sql`
|
||
INSERT INTO stadiums (tournament_year, name, city)
|
||
VALUES (${year}, ${s.name}, ${s.city ?? null})
|
||
ON CONFLICT DO NOTHING
|
||
`)
|
||
}
|
||
}
|
||
|
||
// Matches and goals
|
||
for (const m of mainData.matches) {
|
||
const t1Id = await upsertTeam(m.team1)
|
||
const t2Id = await upsertTeam(m.team2)
|
||
const score = parseScore(m.score)
|
||
|
||
const [matchRow] = await db.execute(sql`
|
||
INSERT INTO matches (
|
||
tournament_year, round, group_name, date, time_local,
|
||
team1_id, team2_id,
|
||
score_ft_home, score_ft_away,
|
||
score_ht_home, score_ht_away,
|
||
score_et_home, score_et_away,
|
||
score_p_home, score_p_away,
|
||
is_quali_playoff
|
||
) VALUES (
|
||
${year}, ${m.round ?? 'Unknown'}, ${m.group ?? null},
|
||
${m.date ?? null}, ${m.time ?? null},
|
||
${t1Id}, ${t2Id},
|
||
${score.ft?.[0] ?? null}, ${score.ft?.[1] ?? null},
|
||
${score.ht?.[0] ?? null}, ${score.ht?.[1] ?? null},
|
||
${score.et?.[0] ?? null}, ${score.et?.[1] ?? null},
|
||
${score.p?.[0] ?? null}, ${score.p?.[1] ?? null},
|
||
false
|
||
)
|
||
ON CONFLICT (tournament_year, team1_id, team2_id, date, is_quali_playoff) DO UPDATE SET
|
||
round = EXCLUDED.round,
|
||
group_name = COALESCE(EXCLUDED.group_name, matches.group_name),
|
||
time_local = COALESCE(EXCLUDED.time_local, matches.time_local),
|
||
score_ft_home = COALESCE(EXCLUDED.score_ft_home, matches.score_ft_home),
|
||
score_ft_away = COALESCE(EXCLUDED.score_ft_away, matches.score_ft_away),
|
||
score_ht_home = COALESCE(EXCLUDED.score_ht_home, matches.score_ht_home),
|
||
score_ht_away = COALESCE(EXCLUDED.score_ht_away, matches.score_ht_away),
|
||
score_et_home = COALESCE(EXCLUDED.score_et_home, matches.score_et_home),
|
||
score_et_away = COALESCE(EXCLUDED.score_et_away, matches.score_et_away),
|
||
score_p_home = COALESCE(EXCLUDED.score_p_home, matches.score_p_home),
|
||
score_p_away = COALESCE(EXCLUDED.score_p_away, matches.score_p_away)
|
||
RETURNING id
|
||
`)
|
||
const matchId = (matchRow as { id: number }).id
|
||
|
||
// Goals (delete + re-insert)
|
||
await db.execute(sql`DELETE FROM goals WHERE match_id = ${matchId}`)
|
||
|
||
for (const [rawGoals, teamId, ogTeamId] of [
|
||
[m.goals1 ?? [], t1Id, t2Id],
|
||
[m.goals2 ?? [], t2Id, t1Id],
|
||
] as [RawGoal[], number, number][]) {
|
||
for (const g of rawGoals) {
|
||
if (!g.name) continue
|
||
const minute = g.minute != null ? parseInt(String(g.minute)) : null
|
||
const actualTeamId = g.owngoal ? ogTeamId : teamId
|
||
await db.execute(sql`
|
||
INSERT INTO goals (match_id, team_id, player_name, minute, minute_offset, is_penalty, is_own_goal)
|
||
VALUES (${matchId}, ${actualTeamId}, ${g.name}, ${!minute || isNaN(minute) ? null : minute},
|
||
${g.offset ?? 0}, ${g.penalty ?? false}, ${g.owngoal ?? false})
|
||
`)
|
||
goalCount++
|
||
}
|
||
}
|
||
|
||
matchCount++
|
||
}
|
||
|
||
// Squads
|
||
const squadsData = readJson<RawSquad[]>(path.join(yearDir, 'worldcup.squads.json'))
|
||
if (squadsData && Array.isArray(squadsData)) {
|
||
for (const sq of squadsData) {
|
||
const teamId = await upsertTeam(sq.name)
|
||
for (const p of sq.players) {
|
||
if (!p.name) continue
|
||
const dob = p.date_of_birth ? p.date_of_birth.replace(/\s/g, '') : null
|
||
await db.execute(sql`
|
||
INSERT INTO squads (tournament_year, team_id, player_name, shirt_number, position, date_of_birth)
|
||
VALUES (${year}, ${teamId}, ${p.name}, ${p.number ?? null},
|
||
${p.pos ?? null}, ${dob})
|
||
ON CONFLICT (tournament_year, team_id, shirt_number) DO UPDATE SET
|
||
player_name = EXCLUDED.player_name,
|
||
position = EXCLUDED.position,
|
||
date_of_birth = EXCLUDED.date_of_birth
|
||
`)
|
||
}
|
||
}
|
||
}
|
||
|
||
console.log(` ${year}: ${matchCount} matches, ${goalCount} goals`)
|
||
totalMatches += matchCount
|
||
totalGoals += goalCount
|
||
}
|
||
|
||
// 3. Group standings (computed from match results)
|
||
console.log('Computing group standings...')
|
||
await db.execute(sql`
|
||
DELETE FROM group_standings WHERE tournament_year < 2026
|
||
`)
|
||
await db.execute(sql`
|
||
INSERT INTO group_standings (tournament_year, group_name, team_id, played, won, drawn, lost,
|
||
goals_for, goals_against, goal_diff, pts)
|
||
WITH match_results AS (
|
||
SELECT tournament_year, group_name, team1_id AS team_id, score_ft_home AS gf, score_ft_away AS ga
|
||
FROM matches WHERE tournament_year < 2026 AND group_name IS NOT NULL
|
||
AND is_quali_playoff = false AND score_ft_home IS NOT NULL
|
||
UNION ALL
|
||
SELECT tournament_year, group_name, team2_id, score_ft_away, score_ft_home
|
||
FROM matches WHERE tournament_year < 2026 AND group_name IS NOT NULL
|
||
AND is_quali_playoff = false AND score_ft_home IS NOT NULL
|
||
)
|
||
SELECT tournament_year, group_name, team_id,
|
||
COUNT(*)::int,
|
||
SUM(CASE WHEN gf > ga THEN 1 ELSE 0 END)::int,
|
||
SUM(CASE WHEN gf = ga THEN 1 ELSE 0 END)::int,
|
||
SUM(CASE WHEN gf < ga THEN 1 ELSE 0 END)::int,
|
||
SUM(gf)::int, SUM(ga)::int, SUM(gf - ga)::int,
|
||
SUM(CASE WHEN gf > ga THEN 3 WHEN gf = ga THEN 1 ELSE 0 END)::int
|
||
FROM match_results
|
||
GROUP BY tournament_year, group_name, team_id
|
||
ON CONFLICT (tournament_year, group_name, team_id) DO UPDATE SET
|
||
played = EXCLUDED.played, won = EXCLUDED.won, drawn = EXCLUDED.drawn,
|
||
lost = EXCLUDED.lost, goals_for = EXCLUDED.goals_for,
|
||
goals_against = EXCLUDED.goals_against, goal_diff = EXCLUDED.goal_diff,
|
||
pts = EXCLUDED.pts
|
||
`)
|
||
|
||
// 4. Tournament aggregates
|
||
await db.execute(sql`
|
||
UPDATE tournaments t SET
|
||
matches_count = (
|
||
SELECT COUNT(*)::int FROM matches WHERE tournament_year = t.year AND is_quali_playoff = false
|
||
),
|
||
total_goals = (
|
||
SELECT COUNT(g.id)::int
|
||
FROM goals g JOIN matches m ON g.match_id = m.id
|
||
WHERE m.tournament_year = t.year AND m.is_quali_playoff = false
|
||
),
|
||
avg_goals_per_game = (
|
||
SELECT ROUND(COUNT(g.id)::numeric / NULLIF(COUNT(DISTINCT m.id), 0), 2)
|
||
FROM goals g JOIN matches m ON g.match_id = m.id
|
||
WHERE m.tournament_year = t.year AND m.is_quali_playoff = false
|
||
AND m.score_ft_home IS NOT NULL
|
||
)
|
||
WHERE t.year < 2026
|
||
`)
|
||
|
||
console.log(`\n✅ Seed complete: ${totalMatches} matches, ${totalGoals} goals (1930–2022)`)
|
||
await client.end()
|
||
}
|
||
|
||
run().catch(e => { console.error('Seed failed:', e); process.exit(1) })
|