fix: parse Wikipedia 12h time format and sort upcoming matches with NULLS LAST

Wikipedia stores match times as "6:00 p.m." (1-digit hour) which didn't
match the \d{2}:\d{2} regex, producing NULL for those matches. Introduced
parseTime12h() to handle 1-2 digit hours + AM/PM and convert to 24h.
Also sort upcomingMatches by NULLS LAST so unscheduled games appear after
timed ones rather than first. Dropped "openfootball" data attribution.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 17:50:30 +02:00
parent 42063cdfda
commit 187ee2e312
3 changed files with 14 additions and 3 deletions
+12 -1
View File
@@ -154,6 +154,17 @@ function parseGroundParts(ground: string): { name: string; city: string } {
return { name: ground, city: '' }
}
function parseTime12h(text: string): string | undefined {
const m = text.match(/(\d{1,2}):(\d{2})\s*([ap]\.?m\.?)/i)
if (!m) return text.match(/(\d{2}:\d{2})/)?.[1]
let h = parseInt(m[1])
const min = m[2]
const isPm = m[3].toLowerCase().replace(/\./g, '').startsWith('p')
if (isPm && h !== 12) h += 12
else if (!isPm && h === 12) h = 0
return `${String(h).padStart(2, '0')}:${min}`
}
function parseBox($: CheerioAPI, $box: Cheerio<Element>, round: string, group: string | null): Match | null {
const team1 = extractTeam($, $box.find('.fhome'))
const team2 = extractTeam($, $box.find('.faway'))
@@ -161,7 +172,7 @@ function parseBox($: CheerioAPI, $box: Cheerio<Element>, round: string, group: s
const dateStr = $box.find('.bday, .dtstart').first().text().trim() || undefined
const timeText = $box.find('.ftime').first().text().trim()
const timeStr = timeText.match(/(\d{2}:\d{2})/)?.[1]
const timeStr = parseTime12h(timeText)
const scoreText = $box.find('.fscore').first().text().trim()
const hasAET = scoreText.toLowerCase().includes('a.e.t.')
const scoreArr = parseScoreText(scoreText)