fix: use full player names from title attr, preserve UTC offset in match times
Wikipedia abbreviates goal scorer display text (e.g. "Müller") but the <a title="Thomas Müller"> attribute always has the full name. Switch parseGoals() to prefer title attr and strip disambiguation suffixes like "(soccer, born 1993)". This ensures Gerd Müller and Thomas Müller get separate player pages. Also preserve the UTC offset from Wikipedia's ftime (e.g. "12:00 UTC-4") so that isLive() can accurately compute UTC kickoff time instead of treating local time as UTC. upcomingMatches sorts by SPLIT_PART on the HH:MM part to ignore the timezone suffix. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -134,7 +134,7 @@ export const resolvers = {
|
|||||||
sql`${matches.scoreFtHome} IS NULL`,
|
sql`${matches.scoreFtHome} IS NULL`,
|
||||||
eq(matches.isQualiPlayoff, false),
|
eq(matches.isQualiPlayoff, false),
|
||||||
))
|
))
|
||||||
.orderBy(asc(matches.date), sql`${matches.timeLocal} ASC NULLS LAST`, asc(matches.id))
|
.orderBy(asc(matches.date), sql`SPLIT_PART(${matches.timeLocal}, ' ', 1) ASC NULLS LAST`, asc(matches.id))
|
||||||
.limit(limit)
|
.limit(limit)
|
||||||
return Promise.all(rows.map(hydrateMatch))
|
return Promise.all(rows.map(hydrateMatch))
|
||||||
} catch (e) { if (isMissingTable(e)) return []; throw e }
|
} catch (e) { if (isMissingTable(e)) return []; throw e }
|
||||||
|
|||||||
+14
-5
@@ -116,8 +116,12 @@ function parseGoals($: CheerioAPI, $td: Cheerio<Element>): Goal[] {
|
|||||||
let playerName = ''
|
let playerName = ''
|
||||||
$li.find('a').each((_, a) => {
|
$li.find('a').each((_, a) => {
|
||||||
if (!$(a).closest('.fb-goal').length) {
|
if (!$(a).closest('.fb-goal').length) {
|
||||||
const t = $(a).text().trim()
|
const display = $(a).text().trim()
|
||||||
if (t) { playerName = t; return false }
|
if (!display) return
|
||||||
|
// title attr has the full unabbreviated name; strip disambiguation suffix
|
||||||
|
const titleAttr = ($(a).attr('title') ?? '').replace(/\s*\([^)]*\)\s*$/, '').trim()
|
||||||
|
playerName = titleAttr || display
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
if (!playerName) return
|
if (!playerName) return
|
||||||
@@ -155,14 +159,19 @@ function parseGroundParts(ground: string): { name: string; city: string } {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function parseTime12h(text: string): string | undefined {
|
function parseTime12h(text: string): string | undefined {
|
||||||
const m = text.match(/(\d{1,2}):(\d{2})\s*([ap]\.?m\.?)/i)
|
// Normalise Unicode minus (U+2212) used by Wikipedia to ASCII hyphen
|
||||||
if (!m) return text.match(/(\d{2}:\d{2})/)?.[1]
|
const t = text.replace(/−/g, '-')
|
||||||
|
const m = t.match(/(\d{1,2}):(\d{2})\s*([ap]\.?m\.?)/i)
|
||||||
|
if (!m) return t.match(/(\d{2}:\d{2})/)?.[1]
|
||||||
let h = parseInt(m[1])
|
let h = parseInt(m[1])
|
||||||
const min = m[2]
|
const min = m[2]
|
||||||
const isPm = m[3].toLowerCase().replace(/\./g, '').startsWith('p')
|
const isPm = m[3].toLowerCase().replace(/\./g, '').startsWith('p')
|
||||||
if (isPm && h !== 12) h += 12
|
if (isPm && h !== 12) h += 12
|
||||||
else if (!isPm && h === 12) h = 0
|
else if (!isPm && h === 12) h = 0
|
||||||
return `${String(h).padStart(2, '0')}:${min}`
|
const time24 = `${String(h).padStart(2, '0')}:${min}`
|
||||||
|
// Preserve UTC offset so isLive() can compute correct UTC kickoff time
|
||||||
|
const tz = t.match(/UTC([+-]\d+(?:\.\d+)?)/i)
|
||||||
|
return tz ? `${time24} UTC${tz[1]}` : time24
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseBox($: CheerioAPI, $box: Cheerio<Element>, round: string, group: string | null): Match | null {
|
function parseBox($: CheerioAPI, $box: Cheerio<Element>, round: string, group: string | null): Match | null {
|
||||||
|
|||||||
Reference in New Issue
Block a user