2026-06-14 15:36:44 +02:00
import postgres from 'postgres'
import { drizzle } from 'drizzle-orm/postgres-js'
import { sql } from 'drizzle-orm'
2026-06-15 17:23:17 +02:00
import { fetchWikiHtml , scrapeYear , scrapeSquads } from '../lib/wiki-scraper'
import { getIso } from '../lib/iso-codes'
2026-06-14 15:36:44 +02:00
2026-06-15 08:53:20 +02:00
const DATABASE_URL = process . env . DATABASE_URL
if ( ! DATABASE_URL ) {
console . error ( 'ERROR: DATABASE_URL environment variable is not set' )
process . exit ( 1 )
}
2026-06-14 15:36:44 +02:00
2026-06-15 17:23:17 +02:00
// ── DB helpers ─────────────────────────────────────────────────────────────
2026-06-14 15:36:44 +02:00
async function run() {
2026-06-15 09:56:43 +02:00
const client = postgres ( DATABASE_URL ! , { max : 2 } )
2026-06-14 15:36:44 +02:00
const db = drizzle ( client )
const teamCache = new Map < string , number > ( )
2026-06-15 17:23:17 +02:00
async function upsertTeam ( rawName : string ) {
2026-06-14 18:43:43 +02:00
if ( teamCache . has ( rawName ) ) return teamCache . get ( rawName ) !
2026-06-15 17:23:17 +02:00
const iso2 = getIso ( rawName )
2026-06-14 15:36:44 +02:00
const [ row ] = await db . execute ( sql `
2026-06-15 17:23:17 +02:00
INSERT INTO teams (name, iso2)
VALUES ( ${ rawName } , ${ iso2 ? ? null } )
ON CONFLICT (name) DO UPDATE SET iso2 = COALESCE(EXCLUDED.iso2, teams.iso2)
2026-06-14 15:36:44 +02:00
RETURNING id
` )
const id = ( row as { id : number } ) . id
2026-06-14 18:43:43 +02:00
teamCache . set ( rawName , id )
2026-06-14 15:36:44 +02:00
return id
}
async function upsertMatch (
year : number , round : string , group : string | null , dateStr : string | null ,
2026-06-15 17:23:17 +02:00
timeStr : string | null , team1Id : number , team2Id : number ,
ft : [ number , number ] | undefined , et : [ number , number ] | undefined , p : [ number , number ] | undefined ,
isQuali : boolean ,
2026-06-14 15:36:44 +02:00
) {
const rows = await db . execute ( sql `
INSERT INTO matches (tournament_year, round, group_name, date, time_local, team1_id, team2_id,
2026-06-15 17:23:17 +02:00
score_ft_home, score_ft_away, score_et_home, score_et_away,
score_p_home, score_p_away, is_quali_playoff)
2026-06-14 15:36:44 +02:00
VALUES (
2026-06-15 17:23:17 +02:00
${ year } , ${ round } , ${ group } , ${ dateStr } , ${ timeStr } , ${ team1Id } , ${ team2Id } ,
${ ft ? . [ 0 ] ? ? null } , ${ ft ? . [ 1 ] ? ? null } ,
${ et ? . [ 0 ] ? ? null } , ${ et ? . [ 1 ] ? ? null } ,
${ p ? . [ 0 ] ? ? null } , ${ p ? . [ 1 ] ? ? null } ,
2026-06-14 15:36:44 +02:00
${ isQuali }
)
ON CONFLICT (tournament_year, team1_id, team2_id, date, is_quali_playoff) DO UPDATE SET
2026-06-14 18:43:43 +02:00
round = EXCLUDED.round,
2026-06-15 17:23:17 +02:00
time_local = COALESCE(EXCLUDED.time_local, matches.time_local),
2026-06-14 15:36:44 +02:00
score_ft_home = COALESCE(EXCLUDED.score_ft_home, matches.score_ft_home),
score_ft_away = COALESCE(EXCLUDED.score_ft_away, matches.score_ft_away),
score_et_home = COALESCE(EXCLUDED.score_et_home, matches.score_et_home),
score_et_away = COALESCE(EXCLUDED.score_et_away, matches.score_et_away),
2026-06-15 17:23:17 +02:00
score_p_home = COALESCE(EXCLUDED.score_p_home, matches.score_p_home),
score_p_away = COALESCE(EXCLUDED.score_p_away, matches.score_p_away)
2026-06-14 15:36:44 +02:00
RETURNING id
` )
return ( rows [ 0 ] as { id : number } ) . id
}
2026-06-15 17:23:17 +02:00
async function replaceGoals ( matchId : number , goals : Array < {
teamId : number ; name : string ; minute : number | null ; offset : number ; penalty : boolean ; owngoal : boolean
} > ) {
2026-06-15 09:56:43 +02:00
await db . transaction ( async tx = > {
await tx . execute ( sql ` DELETE FROM goals WHERE match_id = ${ matchId } ` )
2026-06-15 17:23:17 +02:00
if ( goals . length > 0 ) {
const vals = goals . map ( g = >
2026-06-15 09:56:43 +02:00
sql ` ( ${ matchId } , ${ g . teamId } , ${ g . name } , ${ g . minute } , ${ g . offset } , ${ g . penalty } , ${ g . owngoal } ) `
)
await tx . execute ( sql `
INSERT INTO goals (match_id, team_id, player_name, minute, minute_offset, is_penalty, is_own_goal)
VALUES ${ sql . join ( vals , sql ` , ` ) }
` )
}
} )
2026-06-14 15:36:44 +02:00
}
2026-06-15 17:23:17 +02:00
// ── Incremental group detection ────────────────────────────────────────────
// Groups where every known match already has a FT score — no need to re-fetch their sub-page.
async function getCompletedGroups ( ) : Promise < Set < string > > {
const rows = await db . execute ( sql `
SELECT group_name
FROM matches
WHERE tournament_year = 2026
AND group_name IS NOT NULL
AND is_quali_playoff = false
GROUP BY group_name
HAVING COUNT(*) > 0
AND COUNT(*) = SUM(CASE WHEN score_ft_home IS NOT NULL THEN 1 ELSE 0 END)
` )
return new Set ( rows . map ( r = > ( r as { group_name : string } ) . group_name ) )
}
// ── Sync 2026 from Wikipedia ───────────────────────────────────────────────
2026-06-15 17:37:15 +02:00
const force = process . argv . includes ( '--force' )
if ( force ) {
console . log ( '--force: clearing 2026 data...' )
await db . execute ( sql ` DELETE FROM goals WHERE match_id IN (SELECT id FROM matches WHERE tournament_year = 2026) ` )
await db . execute ( sql ` DELETE FROM squads WHERE tournament_year = 2026 ` )
await db . execute ( sql ` DELETE FROM group_standings WHERE tournament_year = 2026 ` )
await db . execute ( sql ` DELETE FROM stadiums WHERE tournament_year = 2026 ` )
await db . execute ( sql ` DELETE FROM matches WHERE tournament_year = 2026 ` )
// Remove orphaned teams that only appeared in 2026 data
await db . execute ( sql ` DELETE FROM teams WHERE id NOT IN (SELECT team1_id FROM matches UNION SELECT team2_id FROM matches) ` )
}
2026-06-15 17:23:17 +02:00
console . log ( '\nSyncing 2026 from Wikipedia...' )
2026-06-14 15:36:44 +02:00
2026-06-14 18:43:43 +02:00
await db . execute ( sql `
INSERT INTO tournaments (year, host)
VALUES (2026, 'USA / Canada / Mexico')
ON CONFLICT (year) DO NOTHING
` )
2026-06-14 15:36:44 +02:00
2026-06-15 17:23:17 +02:00
const mainHtml = await fetchWikiHtml ( '2026_FIFA_World_Cup' )
if ( ! mainHtml ) {
console . error ( ' FAILED to fetch 2026 Wikipedia page' )
await client . end ( )
process . exit ( 1 )
2026-06-14 18:43:43 +02:00
}
2026-06-14 15:36:44 +02:00
2026-06-15 17:23:17 +02:00
const completedGroups = await getCompletedGroups ( )
if ( completedGroups . size > 0 )
console . log ( ` Skipping completed groups: ${ [ . . . completedGroups ] . sort ( ) . join ( ', ' ) } ` )
process . stdout . write ( ' ' )
const { matches , stadiums , meta } = await scrapeYear ( 2026 , mainHtml , { skipGroups : completedGroups } )
console . log ( )
2026-06-14 18:43:43 +02:00
// Stadiums
2026-06-15 17:23:17 +02:00
for ( const s of stadiums . values ( ) ) {
await db . execute ( sql `
INSERT INTO stadiums (tournament_year, name, city)
VALUES (2026, ${ s . name } , ${ s . city ? ? null } )
ON CONFLICT DO NOTHING
` )
2026-06-14 18:43:43 +02:00
}
2026-06-14 15:36:44 +02:00
2026-06-15 17:23:17 +02:00
// Matches + goals
2026-06-14 18:43:43 +02:00
let matchCount = 0 , goalCount = 0
2026-06-15 17:23:17 +02:00
for ( const m of matches ) {
const t1Id = await upsertTeam ( m . team1 )
const t2Id = await upsertTeam ( m . team2 )
const matchId = await upsertMatch (
2026 , m . round , m . group ? ? null , m . date ? ? null , m . time ? ? null ,
t1Id , t2Id , m . score ? . ft , m . score ? . et , m . score ? . p , false ,
)
const goals = [
. . . ( m . goals1 ? ? [ ] ) . map ( g = > ( {
teamId : g.owngoal ? t2Id : t1Id , name : g.name ,
minute : g.minute ? ? null , offset : g.offset ? ? 0 ,
penalty : g.penalty ? ? false , owngoal : g.owngoal ? ? false ,
} ) ) ,
. . . ( m . goals2 ? ? [ ] ) . map ( g = > ( {
teamId : g.owngoal ? t1Id : t2Id , name : g.name ,
minute : g.minute ? ? null , offset : g.offset ? ? 0 ,
penalty : g.penalty ? ? false , owngoal : g.owngoal ? ? false ,
} ) ) ,
]
if ( goals . length > 0 ) await replaceGoals ( matchId , goals )
matchCount ++
goalCount += goals . length
2026-06-14 18:43:43 +02:00
}
2026-06-14 15:36:44 +02:00
2026-06-15 17:23:17 +02:00
// Squads (fetch once; idempotent upsert so safe to re-run)
const squadHtml = await fetchWikiHtml ( '2026_FIFA_World_Cup_squads' )
if ( squadHtml ) {
const squads = scrapeSquads ( squadHtml )
for ( const sq of squads ) {
const teamId = await upsertTeam ( sq . name )
for ( const p of sq . players ) {
const dob = p . date_of_birth ? p . date_of_birth . replace ( /\s/g , '' ) : null
2026-06-14 18:43:43 +02:00
await db . execute ( sql `
INSERT INTO squads (tournament_year, team_id, player_name, shirt_number, position, date_of_birth)
2026-06-15 17:23:17 +02:00
VALUES (2026, ${ teamId } , ${ p . name } , ${ p . number ? ? null } , ${ p . pos ? ? null } , ${ dob } )
2026-06-14 18:43:43 +02:00
ON CONFLICT (tournament_year, team_id, shirt_number) DO UPDATE SET
2026-06-15 17:23:17 +02:00
player_name = EXCLUDED.player_name,
position = EXCLUDED.position,
date_of_birth = EXCLUDED.date_of_birth
2026-06-14 18:43:43 +02:00
` )
2026-06-14 15:36:44 +02:00
}
}
2026-06-15 17:23:17 +02:00
console . log ( ` Squads: ${ squads . length } teams ` )
2026-06-14 18:43:43 +02:00
}
2026-06-14 15:36:44 +02:00
2026-06-15 17:23:17 +02:00
// Tournament winner (once the final is played)
if ( meta . winner ) {
await db . execute ( sql `
UPDATE tournaments SET
winner = ${ meta . winner } ,
runner_up = ${ meta . runner_up } ,
third_place = ${ meta . third_place } ,
fourth_place = ${ meta . fourth_place }
WHERE year = 2026
` )
2026-06-14 15:36:44 +02:00
}
2026-06-15 17:23:17 +02:00
// Group standings
2026-06-14 15:36:44 +02:00
await db . execute ( sql `
WITH match_results AS (
SELECT tournament_year, group_name, team1_id AS team_id, score_ft_home AS gf, score_ft_away AS ga
FROM matches WHERE tournament_year = 2026 AND group_name IS NOT NULL AND is_quali_playoff = false AND score_ft_home IS NOT NULL
UNION ALL
SELECT tournament_year, group_name, team2_id, score_ft_away, score_ft_home
FROM matches WHERE tournament_year = 2026 AND group_name IS NOT NULL AND is_quali_playoff = false AND score_ft_home IS NOT NULL
)
INSERT INTO group_standings (tournament_year, group_name, team_id, played, won, drawn, lost, goals_for, goals_against, goal_diff, pts)
SELECT tournament_year, group_name, team_id,
2026-06-14 18:43:43 +02:00
COUNT(*)::int,
SUM(CASE WHEN gf > ga THEN 1 ELSE 0 END)::int,
2026-06-14 15:36:44 +02:00
SUM(CASE WHEN gf = ga THEN 1 ELSE 0 END)::int,
SUM(CASE WHEN gf < ga THEN 1 ELSE 0 END)::int,
SUM(gf)::int, SUM(ga)::int, SUM(gf - ga)::int,
SUM(CASE WHEN gf > ga THEN 3 WHEN gf = ga THEN 1 ELSE 0 END)::int
FROM match_results
GROUP BY tournament_year, group_name, team_id
ON CONFLICT (tournament_year, group_name, team_id) DO UPDATE SET
played = EXCLUDED.played, won = EXCLUDED.won, drawn = EXCLUDED.drawn,
lost = EXCLUDED.lost, goals_for = EXCLUDED.goals_for, goals_against = EXCLUDED.goals_against,
goal_diff = EXCLUDED.goal_diff, pts = EXCLUDED.pts
` )
2026-06-15 19:47:52 +02:00
// Ensure every team that appears in a group match has a standings row (0-0-0-0 for unplayed teams)
await db . execute ( sql `
INSERT INTO group_standings (tournament_year, group_name, team_id, played, won, drawn, lost, goals_for, goals_against, goal_diff, pts)
SELECT DISTINCT 2026, group_name, team1_id, 0, 0, 0, 0, 0, 0, 0, 0
FROM matches WHERE tournament_year = 2026 AND group_name IS NOT NULL AND is_quali_playoff = false
UNION
SELECT DISTINCT 2026, group_name, team2_id, 0, 0, 0, 0, 0, 0, 0, 0
FROM matches WHERE tournament_year = 2026 AND group_name IS NOT NULL AND is_quali_playoff = false
ON CONFLICT (tournament_year, group_name, team_id) DO NOTHING
` )
2026-06-14 18:43:43 +02:00
// Tournament aggregates
await db . execute ( sql `
UPDATE tournaments SET
2026-06-15 17:23:17 +02:00
matches_count = (SELECT COUNT(*)::int FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false),
total_goals = (SELECT COALESCE(SUM(score_ft_home + score_ft_away), 0)::int FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false AND score_ft_home IS NOT NULL),
2026-06-14 18:43:43 +02:00
avg_goals_per_game = (
SELECT ROUND(COALESCE(SUM(score_ft_home + score_ft_away), 0)::numeric / NULLIF(COUNT(*), 0), 2)
FROM matches WHERE tournament_year = 2026 AND is_quali_playoff = false AND score_ft_home IS NOT NULL
)
WHERE year = 2026
` )
console . log ( ` ✓ ${ matchCount } matches, ${ goalCount } goals ` )
2026-06-14 15:36:44 +02:00
console . log ( '\n✅ Sync complete!' )
await client . end ( )
}
run ( ) . catch ( e = > { console . error ( e ) ; process . exit ( 1 ) } )