fix: retry failed group subpages, add rate-limit detection in scraper

- Detect Wikipedia plain-text rate-limit response ("You are making too many
  requests") and wait 30s before retrying, rather than silently failing
- Increase inter-attempt delay from 3s to 15s per attempt
- Increase group subpage delay from 1.2s to 3s, year delay from 0.6s to 2s
- Re-scrape 1982, 1998, 2002, 2006 which had failed groups; all groups now
  complete — e.g. 2002 now has 64 matches including Group E (Germany/Klose)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 18:44:54 +02:00
parent d37ebe201e
commit 7e4bf2d07c
18 changed files with 1193 additions and 16 deletions
+9
View File
@@ -26,6 +26,15 @@
"Brazil", "Brazil",
"Czechoslovakia" "Czechoslovakia"
] ]
},
{
"name": "Group 4",
"teams": [
"Peru",
"Bulgaria",
"Germany",
"Morocco"
]
} }
] ]
} }
+209
View File
@@ -751,6 +751,215 @@
} }
], ],
"ground": "Estadio Jalisco, Guadalajara" "ground": "Estadio Jalisco, Guadalajara"
},
{
"round": "Group stage",
"group": "Group 4",
"date": "1970-06-02",
"time": "16:00",
"team1": "Peru",
"team2": "Bulgaria",
"score": {
"ft": [
3,
2
]
},
"goals1": [
{
"name": "Alberto Gallardo",
"minute": 50
},
{
"name": "Héctor Chumpitaz",
"minute": 55
},
{
"name": "Teófilo Cubillas",
"minute": 73
}
],
"goals2": [
{
"name": "Dinko Dermendzhiev",
"minute": 13
},
{
"name": "Hristo Bonev",
"minute": 49
}
],
"ground": "Estadio Nou Camp, León"
},
{
"round": "Group stage",
"group": "Group 4",
"date": "1970-06-03",
"time": "16:00",
"team1": "Germany",
"team2": "Morocco",
"score": {
"ft": [
2,
1
]
},
"goals1": [
{
"name": "Uwe Seeler",
"minute": 56
},
{
"name": "Gerd Müller",
"minute": 80
}
],
"goals2": [
{
"name": "Houmane Jarir",
"minute": 21
}
],
"ground": "Estadio Nou Camp, León"
},
{
"round": "Group stage",
"group": "Group 4",
"date": "1970-06-06",
"time": "16:00",
"team1": "Peru",
"team2": "Morocco",
"score": {
"ft": [
3,
0
]
},
"goals1": [
{
"name": "Teófilo Cubillas",
"minute": 65
},
{
"name": "Teófilo Cubillas",
"minute": 75
},
{
"name": "Roberto Challe",
"minute": 67
}
],
"ground": "Estadio Nou Camp, León"
},
{
"round": "Group stage",
"group": "Group 4",
"date": "1970-06-07",
"time": "12:00",
"team1": "Germany",
"team2": "Bulgaria",
"score": {
"ft": [
5,
2
]
},
"goals1": [
{
"name": "Reinhard Libuda",
"minute": 20
},
{
"name": "Gerd Müller",
"minute": 27
},
{
"name": "Gerd Müller",
"minute": 52,
"penalty": true
},
{
"name": "Gerd Müller",
"minute": 88
},
{
"name": "Uwe Seeler",
"minute": 70
}
],
"goals2": [
{
"name": "Asparuh Nikodimov",
"minute": 12
},
{
"name": "Todor Kolev",
"minute": 89
}
],
"ground": "Estadio Nou Camp, León"
},
{
"round": "Group stage",
"group": "Group 4",
"date": "1970-06-10",
"time": "16:00",
"team1": "Germany",
"team2": "Peru",
"score": {
"ft": [
3,
1
]
},
"goals1": [
{
"name": "Gerd Müller",
"minute": 19
},
{
"name": "Gerd Müller",
"minute": 26
},
{
"name": "Gerd Müller",
"minute": 39
}
],
"goals2": [
{
"name": "Teófilo Cubillas",
"minute": 44
}
],
"ground": "Estadio Nou Camp, León"
},
{
"round": "Group stage",
"group": "Group 4",
"date": "1970-06-11",
"time": "16:00",
"team1": "Bulgaria",
"team2": "Morocco",
"score": {
"ft": [
1,
1
]
},
"goals1": [
{
"name": "Dobromir Zhechev",
"minute": 40
}
],
"goals2": [
{
"name": "Maouhoub Ghazouani",
"minute": 61
}
],
"ground": "Estadio Nou Camp, León"
} }
] ]
} }
+9
View File
@@ -18,6 +18,15 @@
"Mexico" "Mexico"
] ]
}, },
{
"name": "Group 3",
"teams": [
"Austria",
"Spain",
"Brazil",
"Sweden"
]
},
{ {
"name": "Group 4", "name": "Group 4",
"teams": [ "teams": [
+137
View File
@@ -417,6 +417,143 @@
], ],
"ground": "Estadio Gigante de Arroyito, Rosario" "ground": "Estadio Gigante de Arroyito, Rosario"
}, },
{
"round": "Group stage",
"group": "Group 3",
"date": "1978-06-03",
"time": "13:45",
"team1": "Austria",
"team2": "Spain",
"score": {
"ft": [
2,
1
]
},
"goals1": [
{
"name": "Walter Schachner",
"minute": 10
},
{
"name": "Hans Krankl",
"minute": 76
}
],
"goals2": [
{
"name": "Dani",
"minute": 21
}
],
"ground": "Estadio José Amalfitani, Buenos Aires"
},
{
"round": "Group stage",
"group": "Group 3",
"date": "1978-06-03",
"time": "13:45",
"team1": "Brazil",
"team2": "Sweden",
"score": {
"ft": [
1,
1
]
},
"goals1": [
{
"name": "Reinaldo",
"minute": 45
}
],
"goals2": [
{
"name": "Thomas Sjöberg",
"minute": 37
}
],
"ground": "Estadio José Maria Minella, Mar del Plata"
},
{
"round": "Group stage",
"group": "Group 3",
"date": "1978-06-07",
"time": "13:45",
"team1": "Austria",
"team2": "Sweden",
"score": {
"ft": [
1,
0
]
},
"goals1": [
{
"name": "Hans Krankl",
"minute": 42,
"penalty": true
}
],
"ground": "Estadio José Amalfitani, Buenos Aires"
},
{
"round": "Group stage",
"group": "Group 3",
"date": "1978-06-07",
"time": "13:45",
"team1": "Brazil",
"team2": "Spain",
"score": {
"ft": [
0,
0
]
},
"ground": "Estadio José Maria Minella, Mar del Plata"
},
{
"round": "Group stage",
"group": "Group 3",
"date": "1978-06-11",
"time": "13:45",
"team1": "Spain",
"team2": "Sweden",
"score": {
"ft": [
1,
0
]
},
"goals1": [
{
"name": "Juan Manuel Asensi",
"minute": 75
}
],
"ground": "Estadio José Amalfitani, Buenos Aires"
},
{
"round": "Group stage",
"group": "Group 3",
"date": "1978-06-11",
"time": "13:45",
"team1": "Brazil",
"team2": "Austria",
"score": {
"ft": [
1,
0
]
},
"goals1": [
{
"name": "Roberto Dinamite",
"minute": 40
}
],
"ground": "Estadio José Maria Minella, Mar del Plata"
},
{ {
"round": "Group stage", "round": "Group stage",
"group": "Group 4", "group": "Group 4",
+8
View File
@@ -24,6 +24,14 @@
"name": "Estadio Olímpico Chateau Carreras", "name": "Estadio Olímpico Chateau Carreras",
"city": "Córdoba" "city": "Córdoba"
}, },
{
"name": "Estadio José Amalfitani",
"city": "Buenos Aires"
},
{
"name": "Estadio José Maria Minella",
"city": "Mar del Plata"
},
{ {
"name": "Chateau Carreras", "name": "Chateau Carreras",
"city": "Córdoba" "city": "Córdoba"
+8
View File
@@ -54,6 +54,14 @@
"New Zealand" "New Zealand"
] ]
}, },
{
"name": "Group A",
"teams": [
"Poland",
"Belgium",
"Soviet Union"
]
},
{ {
"name": "Group B", "name": "Group B",
"teams": [ "teams": [
+65
View File
@@ -1199,6 +1199,71 @@
], ],
"ground": "Estadio Benito Villamarín, Seville" "ground": "Estadio Benito Villamarín, Seville"
}, },
{
"round": "Second group stage",
"group": "Group A",
"date": "1982-06-28",
"time": "21:00",
"team1": "Poland",
"team2": "Belgium",
"score": {
"ft": [
3,
0
]
},
"goals1": [
{
"name": "Zbigniew Boniek",
"minute": 4
},
{
"name": "Zbigniew Boniek",
"minute": 26
},
{
"name": "Zbigniew Boniek",
"minute": 53
}
],
"ground": "Camp Nou, Barcelona"
},
{
"round": "Second group stage",
"group": "Group A",
"date": "1982-07-01",
"time": "21:00",
"team1": "Belgium",
"team2": "Soviet Union",
"score": {
"ft": [
0,
1
]
},
"goals2": [
{
"name": "Khoren Oganesian",
"minute": 48
}
],
"ground": "Camp Nou, Barcelona"
},
{
"round": "Second group stage",
"group": "Group A",
"date": "1982-07-04",
"time": "21:00",
"team1": "Soviet Union",
"team2": "Poland",
"score": {
"ft": [
0,
0
]
},
"ground": "Camp Nou, Barcelona"
},
{ {
"round": "Second group stage", "round": "Second group stage",
"group": "Group B", "group": "Group B",
+9
View File
@@ -18,6 +18,15 @@
"Austria" "Austria"
] ]
}, },
{
"name": "Group C",
"teams": [
"Saudi Arabia",
"Denmark",
"France",
"South Africa"
]
},
{ {
"name": "Group D", "name": "Group D",
"teams": [ "teams": [
+185
View File
@@ -857,6 +857,191 @@
], ],
"ground": "Stade de la Beaujoire, Nantes" "ground": "Stade de la Beaujoire, Nantes"
}, },
{
"round": "Group stage",
"group": "Group C",
"date": "1998-06-12",
"time": "17:30",
"team1": "Saudi Arabia",
"team2": "Denmark",
"score": {
"ft": [
0,
1
]
},
"goals2": [
{
"name": "Marc Rieper",
"minute": 69
}
],
"ground": "Stade Félix-Bollaert, Lens"
},
{
"round": "Group stage",
"group": "Group C",
"date": "1998-06-12",
"time": "21:00",
"team1": "France",
"team2": "South Africa",
"score": {
"ft": [
3,
0
]
},
"goals1": [
{
"name": "Christophe Dugarry",
"minute": 36
},
{
"name": "Pierre Issa",
"minute": 77,
"owngoal": true
},
{
"name": "Thierry Henry",
"minute": 90,
"offset": 2
}
],
"ground": "Stade Vélodrome, Marseille"
},
{
"round": "Group stage",
"group": "Group C",
"date": "1998-06-18",
"time": "17:30",
"team1": "South Africa",
"team2": "Denmark",
"score": {
"ft": [
1,
1
]
},
"goals1": [
{
"name": "Benni McCarthy",
"minute": 51
}
],
"goals2": [
{
"name": "Allan Nielsen",
"minute": 12
}
],
"ground": "Stade de Toulouse, Toulouse"
},
{
"round": "Group stage",
"group": "Group C",
"date": "1998-06-18",
"time": "21:00",
"team1": "France",
"team2": "Saudi Arabia",
"score": {
"ft": [
4,
0
]
},
"goals1": [
{
"name": "Thierry Henry",
"minute": 37
},
{
"name": "Thierry Henry",
"minute": 78
},
{
"name": "David Trezeguet",
"minute": 68
},
{
"name": "Bixente Lizarazu",
"minute": 85
}
],
"ground": "Stade de France, Saint-Denis"
},
{
"round": "Group stage",
"group": "Group C",
"date": "1998-06-24",
"time": "16:00",
"team1": "France",
"team2": "Denmark",
"score": {
"ft": [
2,
1
]
},
"goals1": [
{
"name": "Youri Djorkaeff",
"minute": 12,
"penalty": true
},
{
"name": "Emmanuel Petit",
"minute": 56
}
],
"goals2": [
{
"name": "Michael Laudrup",
"minute": 42,
"penalty": true
}
],
"ground": "Stade Gerland, Lyon"
},
{
"round": "Group stage",
"group": "Group C",
"date": "1998-06-24",
"time": "16:00",
"team1": "South Africa",
"team2": "Saudi Arabia",
"score": {
"ft": [
2,
2
]
},
"goals1": [
{
"name": "Shaun Bartlett",
"minute": 18
},
{
"name": "Shaun Bartlett",
"minute": 90,
"offset": 3,
"penalty": true
}
],
"goals2": [
{
"name": "Sami Al-Jaber",
"minute": 45,
"offset": 2,
"penalty": true
},
{
"name": "Yousuf Al-Thunayan",
"minute": 74,
"penalty": true
}
],
"ground": "Parc Lescure, Bordeaux"
},
{ {
"round": "Group stage", "round": "Group stage",
"group": "Group D", "group": "Group D",
+9
View File
@@ -36,6 +36,15 @@
"Portugal" "Portugal"
] ]
}, },
{
"name": "Group E",
"teams": [
"Republic of Ireland",
"Cameroon",
"Germany",
"Saudi Arabia"
]
},
{ {
"name": "Group F", "name": "Group F",
"teams": [ "teams": [
+181
View File
@@ -1154,6 +1154,187 @@
], ],
"ground": "Daejeon World Cup Stadium, Daejeon" "ground": "Daejeon World Cup Stadium, Daejeon"
}, },
{
"round": "Group stage",
"group": "Group E",
"date": "2002-06-01",
"time": "15:30",
"team1": "Republic of Ireland",
"team2": "Cameroon",
"score": {
"ft": [
1,
1
]
},
"goals1": [
{
"name": "Matt Holland",
"minute": 52
}
],
"goals2": [
{
"name": "Patrick M'Boma",
"minute": 39
}
],
"ground": "Niigata Stadium, Niigata"
},
{
"round": "Group stage",
"group": "Group E",
"date": "2002-06-01",
"time": "20:30",
"team1": "Germany",
"team2": "Saudi Arabia",
"score": {
"ft": [
8,
0
]
},
"goals1": [
{
"name": "Miroslav Klose",
"minute": 20
},
{
"name": "Miroslav Klose",
"minute": 25
},
{
"name": "Miroslav Klose",
"minute": 70
},
{
"name": "Michael Ballack",
"minute": 40
},
{
"name": "Carsten Jancker",
"minute": 45,
"offset": 1
},
{
"name": "Thomas Linke",
"minute": 73
},
{
"name": "Oliver Bierhoff",
"minute": 84
},
{
"name": "Bernd Schneider",
"minute": 90,
"offset": 1
}
],
"ground": "Sapporo Dome, Sapporo"
},
{
"round": "Group stage",
"group": "Group E",
"date": "2002-06-05",
"time": "20:30",
"team1": "Germany",
"team2": "Republic of Ireland",
"score": {
"ft": [
1,
1
]
},
"goals1": [
{
"name": "Miroslav Klose",
"minute": 19
}
],
"goals2": [
{
"name": "Robbie Keane",
"minute": 90,
"offset": 2
}
],
"ground": "Kashima Soccer Stadium, Ibaraki"
},
{
"round": "Group stage",
"group": "Group E",
"date": "2002-06-06",
"time": "18:00",
"team1": "Cameroon",
"team2": "Saudi Arabia",
"score": {
"ft": [
1,
0
]
},
"goals1": [
{
"name": "Samuel Eto'o",
"minute": 66
}
],
"ground": "Saitama Stadium, Saitama"
},
{
"round": "Group stage",
"group": "Group E",
"date": "2002-06-11",
"time": "20:30",
"team1": "Cameroon",
"team2": "Germany",
"score": {
"ft": [
0,
2
]
},
"goals2": [
{
"name": "Marco Bode",
"minute": 50
},
{
"name": "Miroslav Klose",
"minute": 79
}
],
"ground": "Ecopa Stadium, Shizuoka"
},
{
"round": "Group stage",
"group": "Group E",
"date": "2002-06-11",
"time": "20:30",
"team1": "Saudi Arabia",
"team2": "Republic of Ireland",
"score": {
"ft": [
0,
3
]
},
"goals2": [
{
"name": "Robbie Keane",
"minute": 7
},
{
"name": "Gary Breen",
"minute": 61
},
{
"name": "Damien Duff",
"minute": 87
}
],
"ground": "International Stadium Yokohama, Yokohama"
},
{ {
"round": "Group stage", "round": "Group stage",
"group": "Group F", "group": "Group F",
+18 -10
View File
@@ -77,17 +77,29 @@
"city": "Ulsan" "city": "Ulsan"
}, },
{ {
"name": "Kashima Soccer Stadium", "name": "Niigata Stadium",
"city": "Ibaraki" "city": "Niigata"
},
{
"name": "Wing Stadium",
"city": "Kobe"
}, },
{ {
"name": "Sapporo Dome", "name": "Sapporo Dome",
"city": "Sapporo" "city": "Sapporo"
}, },
{
"name": "Kashima Soccer Stadium",
"city": "Ibaraki"
},
{
"name": "Ecopa Stadium",
"city": "Shizuoka"
},
{
"name": "International Stadium Yokohama",
"city": "Yokohama"
},
{
"name": "Wing Stadium",
"city": "Kobe"
},
{ {
"name": "Niigata Big Swan Stadium", "name": "Niigata Big Swan Stadium",
"city": "Niigata" "city": "Niigata"
@@ -96,10 +108,6 @@
"name": "Ōita Big Eye Stadium", "name": "Ōita Big Eye Stadium",
"city": "Ōita" "city": "Ōita"
}, },
{
"name": "International Stadium Yokohama",
"city": "Yokohama"
},
{ {
"name": "Shizuoka Ecopa Stadium", "name": "Shizuoka Ecopa Stadium",
"city": "Fukuroi, Shizuoka" "city": "Fukuroi, Shizuoka"
+9
View File
@@ -54,6 +54,15 @@
"Croatia" "Croatia"
] ]
}, },
{
"name": "Group G",
"teams": [
"South Korea",
"Togo",
"France",
"Switzerland"
]
},
{ {
"name": "Group H", "name": "Group H",
"teams": [ "teams": [
+148
View File
@@ -1430,6 +1430,154 @@
], ],
"ground": "Gottlieb-Daimler-Stadion, Stuttgart" "ground": "Gottlieb-Daimler-Stadion, Stuttgart"
}, },
{
"round": "Group stage",
"group": "Group G",
"date": "2006-06-13",
"time": "15:00",
"team1": "South Korea",
"team2": "Togo",
"score": {
"ft": [
2,
1
]
},
"goals1": [
{
"name": "Lee Chun-soo",
"minute": 54
},
{
"name": "Ahn Jung-hwan",
"minute": 72
}
],
"goals2": [
{
"name": "Mohamed Kader",
"minute": 31
}
],
"ground": "Waldstadion, Frankfurt"
},
{
"round": "Group stage",
"group": "Group G",
"date": "2006-06-13",
"time": "18:00",
"team1": "France",
"team2": "Switzerland",
"score": {
"ft": [
0,
0
]
},
"ground": "Gottlieb-Daimler-Stadion, Stuttgart"
},
{
"round": "Group stage",
"group": "Group G",
"date": "2006-06-18",
"time": "21:00",
"team1": "France",
"team2": "South Korea",
"score": {
"ft": [
1,
1
]
},
"goals1": [
{
"name": "Thierry Henry",
"minute": 9
}
],
"goals2": [
{
"name": "Park Ji-sung",
"minute": 81
}
],
"ground": "Zentralstadion, Leipzig"
},
{
"round": "Group stage",
"group": "Group G",
"date": "2006-06-19",
"time": "15:00",
"team1": "Togo",
"team2": "Switzerland",
"score": {
"ft": [
0,
2
]
},
"goals2": [
{
"name": "Alexander Frei",
"minute": 16
},
{
"name": "Tranquillo Barnetta",
"minute": 88
}
],
"ground": "Westfalenstadion, Dortmund"
},
{
"round": "Group stage",
"group": "Group G",
"date": "2006-06-23",
"time": "21:00",
"team1": "Togo",
"team2": "France",
"score": {
"ft": [
0,
2
]
},
"goals2": [
{
"name": "Patrick Vieira",
"minute": 55
},
{
"name": "Thierry Henry",
"minute": 61
}
],
"ground": "RheinEnergieStadion, Cologne"
},
{
"round": "Group stage",
"group": "Group G",
"date": "2006-06-23",
"time": "21:00",
"team1": "Switzerland",
"team2": "South Korea",
"score": {
"ft": [
2,
0
]
},
"goals1": [
{
"name": "Philippe Senderos",
"minute": 23
},
{
"name": "Alexander Frei",
"minute": 77
}
],
"ground": "Niedersachsenstadion, Hanover"
},
{ {
"round": "Group stage", "round": "Group stage",
"group": "Group H", "group": "Group H",
+9
View File
@@ -1,5 +1,14 @@
{ {
"groups": [ "groups": [
{
"name": "Group A",
"teams": [
"Qatar",
"Ecuador",
"Senegal",
"Netherlands"
]
},
{ {
"name": "Group B", "name": "Group B",
"teams": [ "teams": [
+171
View File
@@ -540,6 +540,177 @@
], ],
"ground": "Lusail Stadium, Lusail" "ground": "Lusail Stadium, Lusail"
}, },
{
"round": "Group stage",
"group": "Group A",
"date": "2022-11-20",
"time": "19:00",
"team1": "Qatar",
"team2": "Ecuador",
"score": {
"ft": [
0,
2
]
},
"goals2": [
{
"name": "Enner Valencia",
"minute": 16,
"penalty": true
},
{
"name": "Enner Valencia",
"minute": 31
}
],
"ground": "Al Bayt Stadium, Al Khor"
},
{
"round": "Group stage",
"group": "Group A",
"date": "2022-11-21",
"time": "19:00",
"team1": "Senegal",
"team2": "Netherlands",
"score": {
"ft": [
0,
2
]
},
"goals2": [
{
"name": "Cody Gakpo",
"minute": 84
},
{
"name": "Davy Klaassen",
"minute": 90,
"offset": 9
}
],
"ground": "Al Thumama Stadium, Doha"
},
{
"round": "Group stage",
"group": "Group A",
"date": "2022-11-25",
"time": "16:00",
"team1": "Qatar",
"team2": "Senegal",
"score": {
"ft": [
1,
3
]
},
"goals1": [
{
"name": "Mohammed Muntari",
"minute": 78
}
],
"goals2": [
{
"name": "Boulaye Dia",
"minute": 41
},
{
"name": "Famara Diédhiou",
"minute": 48
},
{
"name": "Bamba Dieng",
"minute": 84
}
],
"ground": "Al Thumama Stadium, Doha"
},
{
"round": "Group stage",
"group": "Group A",
"date": "2022-11-25",
"time": "19:00",
"team1": "Netherlands",
"team2": "Ecuador",
"score": {
"ft": [
1,
1
]
},
"goals1": [
{
"name": "Cody Gakpo",
"minute": 6
}
],
"goals2": [
{
"name": "Enner Valencia",
"minute": 49
}
],
"ground": "Khalifa International Stadium, Al Rayyan"
},
{
"round": "Group stage",
"group": "Group A",
"date": "2022-11-29",
"time": "18:00",
"team1": "Ecuador",
"team2": "Senegal",
"score": {
"ft": [
1,
2
]
},
"goals1": [
{
"name": "Moisés Caicedo",
"minute": 67
}
],
"goals2": [
{
"name": "Ismaïla Sarr",
"minute": 44,
"penalty": true
},
{
"name": "Kalidou Koulibaly",
"minute": 70
}
],
"ground": "Khalifa International Stadium, Al Rayyan"
},
{
"round": "Group stage",
"group": "Group A",
"date": "2022-11-29",
"time": "18:00",
"team1": "Netherlands",
"team2": "Qatar",
"score": {
"ft": [
2,
0
]
},
"goals1": [
{
"name": "Cody Gakpo",
"minute": 26
},
{
"name": "Frenkie de Jong",
"minute": 49
}
],
"ground": "Al Bayt Stadium, Al Khor"
},
{ {
"round": "Group stage", "round": "Group stage",
"group": "Group B", "group": "Group B",
+8 -5
View File
@@ -58,14 +58,17 @@ type State = { active: boolean; round: string; group: string | null }
const delay = (ms: number) => new Promise(r => setTimeout(r, ms)) const delay = (ms: number) => new Promise(r => setTimeout(r, ms))
export async function fetchWikiHtml(page: string, retries = 5): Promise<string | null> { export async function fetchWikiHtml(page: string, retries = 6): Promise<string | null> {
const url = `https://en.wikipedia.org/w/api.php?action=parse&page=${encodeURIComponent(page)}&format=json&prop=text&disabletoc=1` const url = `https://en.wikipedia.org/w/api.php?action=parse&page=${encodeURIComponent(page)}&format=json&prop=text&disabletoc=1`
for (let attempt = 0; attempt < retries; attempt++) { for (let attempt = 0; attempt < retries; attempt++) {
try { try {
if (attempt > 0) await delay(3000 * attempt) if (attempt > 0) await delay(15000 * attempt)
const res = await fetch(url, { headers: { 'User-Agent': 'WorldCupScraper/1.0' } }) const res = await fetch(url, { headers: { 'User-Agent': 'WorldCupScraper/1.0 (worldcup-stats)' } })
if (res.status === 429) { await delay(30000); continue }
if (!res.ok) continue if (!res.ok) continue
const data = await res.json() as { parse?: { text?: { '*': string } } } const text = await res.text()
if (text.toLowerCase().startsWith('you are making')) { await delay(30000); continue }
const data = JSON.parse(text) as { parse?: { text?: { '*': string } } }
const html = data?.parse?.text?.['*'] const html = data?.parse?.text?.['*']
if (html) return html if (html) return html
} catch { } catch {
@@ -414,7 +417,7 @@ export async function scrapeYear(
process.stdout.write(`[skip ${group}] `) process.stdout.write(`[skip ${group}] `)
continue continue
} }
await delay(1200) await delay(3000)
const subHtml = await fetchWikiHtml(page) const subHtml = await fetchWikiHtml(page)
if (!subHtml) { process.stdout.write(`(failed: ${page}) `); continue } if (!subHtml) { process.stdout.write(`(failed: ${page}) `); continue }
+1 -1
View File
@@ -81,7 +81,7 @@ async function main() {
const { matches, stadiums, groups, meta } = await scrapeYear(year, mainHtml) const { matches, stadiums, groups, meta } = await scrapeYear(year, mainHtml)
writeMatches(year, matches, stadiums, groups, meta) writeMatches(year, matches, stadiums, groups, meta)
process.stdout.write(`${matches.length} matches`) process.stdout.write(`${matches.length} matches`)
await delay(600) await delay(2000)
} }
if (doSquads) { if (doSquads) {