fix: use full player names from title attr, preserve UTC offset in match times
Wikipedia abbreviates goal scorer display text (e.g. "Müller") but the <a title="Thomas Müller"> attribute always has the full name. Switch parseGoals() to prefer title attr and strip disambiguation suffixes like "(soccer, born 1993)". This ensures Gerd Müller and Thomas Müller get separate player pages. Also preserve the UTC offset from Wikipedia's ftime (e.g. "12:00 UTC-4") so that isLive() can accurately compute UTC kickoff time instead of treating local time as UTC. upcomingMatches sorts by SPLIT_PART on the HH:MM part to ignore the timezone suffix. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -134,7 +134,7 @@ export const resolvers = {
|
||||
sql`${matches.scoreFtHome} IS NULL`,
|
||||
eq(matches.isQualiPlayoff, false),
|
||||
))
|
||||
.orderBy(asc(matches.date), sql`${matches.timeLocal} ASC NULLS LAST`, asc(matches.id))
|
||||
.orderBy(asc(matches.date), sql`SPLIT_PART(${matches.timeLocal}, ' ', 1) ASC NULLS LAST`, asc(matches.id))
|
||||
.limit(limit)
|
||||
return Promise.all(rows.map(hydrateMatch))
|
||||
} catch (e) { if (isMissingTable(e)) return []; throw e }
|
||||
|
||||
+14
-5
@@ -116,8 +116,12 @@ function parseGoals($: CheerioAPI, $td: Cheerio<Element>): Goal[] {
|
||||
let playerName = ''
|
||||
$li.find('a').each((_, a) => {
|
||||
if (!$(a).closest('.fb-goal').length) {
|
||||
const t = $(a).text().trim()
|
||||
if (t) { playerName = t; return false }
|
||||
const display = $(a).text().trim()
|
||||
if (!display) return
|
||||
// title attr has the full unabbreviated name; strip disambiguation suffix
|
||||
const titleAttr = ($(a).attr('title') ?? '').replace(/\s*\([^)]*\)\s*$/, '').trim()
|
||||
playerName = titleAttr || display
|
||||
return false
|
||||
}
|
||||
})
|
||||
if (!playerName) return
|
||||
@@ -155,14 +159,19 @@ function parseGroundParts(ground: string): { name: string; city: string } {
|
||||
}
|
||||
|
||||
function parseTime12h(text: string): string | undefined {
|
||||
const m = text.match(/(\d{1,2}):(\d{2})\s*([ap]\.?m\.?)/i)
|
||||
if (!m) return text.match(/(\d{2}:\d{2})/)?.[1]
|
||||
// Normalise Unicode minus (U+2212) used by Wikipedia to ASCII hyphen
|
||||
const t = text.replace(/−/g, '-')
|
||||
const m = t.match(/(\d{1,2}):(\d{2})\s*([ap]\.?m\.?)/i)
|
||||
if (!m) return t.match(/(\d{2}:\d{2})/)?.[1]
|
||||
let h = parseInt(m[1])
|
||||
const min = m[2]
|
||||
const isPm = m[3].toLowerCase().replace(/\./g, '').startsWith('p')
|
||||
if (isPm && h !== 12) h += 12
|
||||
else if (!isPm && h === 12) h = 0
|
||||
return `${String(h).padStart(2, '0')}:${min}`
|
||||
const time24 = `${String(h).padStart(2, '0')}:${min}`
|
||||
// Preserve UTC offset so isLive() can compute correct UTC kickoff time
|
||||
const tz = t.match(/UTC([+-]\d+(?:\.\d+)?)/i)
|
||||
return tz ? `${time24} UTC${tz[1]}` : time24
|
||||
}
|
||||
|
||||
function parseBox($: CheerioAPI, $box: Cheerio<Element>, round: string, group: string | null): Match | null {
|
||||
|
||||
Reference in New Issue
Block a user