fix: use full player names from title attr, preserve UTC offset in match times

Wikipedia abbreviates goal scorer display text (e.g. "Müller") but the
<a title="Thomas Müller"> attribute always has the full name. Switch
parseGoals() to prefer title attr and strip disambiguation suffixes like
"(soccer, born 1993)". This ensures Gerd Müller and Thomas Müller get
separate player pages.

Also preserve the UTC offset from Wikipedia's ftime (e.g. "12:00 UTC-4")
so that isLive() can accurately compute UTC kickoff time instead of
treating local time as UTC. upcomingMatches sorts by SPLIT_PART on the
HH:MM part to ignore the timezone suffix.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 18:14:53 +02:00
parent b141356247
commit 9ce2a4e27c
2 changed files with 15 additions and 6 deletions
+1 -1
View File
@@ -134,7 +134,7 @@ export const resolvers = {
sql`${matches.scoreFtHome} IS NULL`,
eq(matches.isQualiPlayoff, false),
))
.orderBy(asc(matches.date), sql`${matches.timeLocal} ASC NULLS LAST`, asc(matches.id))
.orderBy(asc(matches.date), sql`SPLIT_PART(${matches.timeLocal}, ' ', 1) ASC NULLS LAST`, asc(matches.id))
.limit(limit)
return Promise.all(rows.map(hydrateMatch))
} catch (e) { if (isMissingTable(e)) return []; throw e }
+14 -5
View File
@@ -116,8 +116,12 @@ function parseGoals($: CheerioAPI, $td: Cheerio<Element>): Goal[] {
let playerName = ''
$li.find('a').each((_, a) => {
if (!$(a).closest('.fb-goal').length) {
const t = $(a).text().trim()
if (t) { playerName = t; return false }
const display = $(a).text().trim()
if (!display) return
// title attr has the full unabbreviated name; strip disambiguation suffix
const titleAttr = ($(a).attr('title') ?? '').replace(/\s*\([^)]*\)\s*$/, '').trim()
playerName = titleAttr || display
return false
}
})
if (!playerName) return
@@ -155,14 +159,19 @@ function parseGroundParts(ground: string): { name: string; city: string } {
}
function parseTime12h(text: string): string | undefined {
const m = text.match(/(\d{1,2}):(\d{2})\s*([ap]\.?m\.?)/i)
if (!m) return text.match(/(\d{2}:\d{2})/)?.[1]
// Normalise Unicode minus (U+2212) used by Wikipedia to ASCII hyphen
const t = text.replace(//g, '-')
const m = t.match(/(\d{1,2}):(\d{2})\s*([ap]\.?m\.?)/i)
if (!m) return t.match(/(\d{2}:\d{2})/)?.[1]
let h = parseInt(m[1])
const min = m[2]
const isPm = m[3].toLowerCase().replace(/\./g, '').startsWith('p')
if (isPm && h !== 12) h += 12
else if (!isPm && h === 12) h = 0
return `${String(h).padStart(2, '0')}:${min}`
const time24 = `${String(h).padStart(2, '0')}:${min}`
// Preserve UTC offset so isLive() can compute correct UTC kickoff time
const tz = t.match(/UTC([+-]\d+(?:\.\d+)?)/i)
return tz ? `${time24} UTC${tz[1]}` : time24
}
function parseBox($: CheerioAPI, $box: Cheerio<Element>, round: string, group: string | null): Match | null {