From 7e4bf2d07cf88e205fac41f117987bf579645c42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Mon, 15 Jun 2026 18:44:54 +0200 Subject: [PATCH] fix: retry failed group subpages, add rate-limit detection in scraper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Detect Wikipedia plain-text rate-limit response ("You are making too many requests") and wait 30s before retrying, rather than silently failing - Increase inter-attempt delay from 3s to 15s per attempt - Increase group subpage delay from 1.2s to 3s, year delay from 0.6s to 2s - Re-scrape 1982, 1998, 2002, 2006 which had failed groups; all groups now complete — e.g. 2002 now has 64 matches including Group E (Germany/Klose) Co-Authored-By: Claude Sonnet 4.6 --- data/1970/worldcup.groups.json | 9 ++ data/1970/worldcup.json | 209 +++++++++++++++++++++++++++++++ data/1978/worldcup.groups.json | 9 ++ data/1978/worldcup.json | 137 ++++++++++++++++++++ data/1978/worldcup.stadiums.json | 8 ++ data/1982/worldcup.groups.json | 8 ++ data/1982/worldcup.json | 65 ++++++++++ data/1998/worldcup.groups.json | 9 ++ data/1998/worldcup.json | 185 +++++++++++++++++++++++++++ data/2002/worldcup.groups.json | 9 ++ data/2002/worldcup.json | 181 ++++++++++++++++++++++++++ data/2002/worldcup.stadiums.json | 28 +++-- data/2006/worldcup.groups.json | 9 ++ data/2006/worldcup.json | 148 ++++++++++++++++++++++ data/2022/worldcup.groups.json | 9 ++ data/2022/worldcup.json | 171 +++++++++++++++++++++++++ lib/wiki-scraper.ts | 13 +- scripts/scrape-wikipedia.ts | 2 +- 18 files changed, 1193 insertions(+), 16 deletions(-) diff --git a/data/1970/worldcup.groups.json b/data/1970/worldcup.groups.json index c91b832..c7512c4 100644 --- a/data/1970/worldcup.groups.json +++ b/data/1970/worldcup.groups.json @@ -26,6 +26,15 @@ "Brazil", "Czechoslovakia" ] + }, + { + "name": "Group 4", + "teams": [ + "Peru", + "Bulgaria", + "Germany", + "Morocco" + ] } ] } \ No newline at end of file diff --git a/data/1970/worldcup.json b/data/1970/worldcup.json index bf2fb5b..afe1aea 100644 --- a/data/1970/worldcup.json +++ b/data/1970/worldcup.json @@ -751,6 +751,215 @@ } ], "ground": "Estadio Jalisco, Guadalajara" + }, + { + "round": "Group stage", + "group": "Group 4", + "date": "1970-06-02", + "time": "16:00", + "team1": "Peru", + "team2": "Bulgaria", + "score": { + "ft": [ + 3, + 2 + ] + }, + "goals1": [ + { + "name": "Alberto Gallardo", + "minute": 50 + }, + { + "name": "Héctor Chumpitaz", + "minute": 55 + }, + { + "name": "Teófilo Cubillas", + "minute": 73 + } + ], + "goals2": [ + { + "name": "Dinko Dermendzhiev", + "minute": 13 + }, + { + "name": "Hristo Bonev", + "minute": 49 + } + ], + "ground": "Estadio Nou Camp, León" + }, + { + "round": "Group stage", + "group": "Group 4", + "date": "1970-06-03", + "time": "16:00", + "team1": "Germany", + "team2": "Morocco", + "score": { + "ft": [ + 2, + 1 + ] + }, + "goals1": [ + { + "name": "Uwe Seeler", + "minute": 56 + }, + { + "name": "Gerd Müller", + "minute": 80 + } + ], + "goals2": [ + { + "name": "Houmane Jarir", + "minute": 21 + } + ], + "ground": "Estadio Nou Camp, León" + }, + { + "round": "Group stage", + "group": "Group 4", + "date": "1970-06-06", + "time": "16:00", + "team1": "Peru", + "team2": "Morocco", + "score": { + "ft": [ + 3, + 0 + ] + }, + "goals1": [ + { + "name": "Teófilo Cubillas", + "minute": 65 + }, + { + "name": "Teófilo Cubillas", + "minute": 75 + }, + { + "name": "Roberto Challe", + "minute": 67 + } + ], + "ground": "Estadio Nou Camp, León" + }, + { + "round": "Group stage", + "group": "Group 4", + "date": "1970-06-07", + "time": "12:00", + "team1": "Germany", + "team2": "Bulgaria", + "score": { + "ft": [ + 5, + 2 + ] + }, + "goals1": [ + { + "name": "Reinhard Libuda", + "minute": 20 + }, + { + "name": "Gerd Müller", + "minute": 27 + }, + { + "name": "Gerd Müller", + "minute": 52, + "penalty": true + }, + { + "name": "Gerd Müller", + "minute": 88 + }, + { + "name": "Uwe Seeler", + "minute": 70 + } + ], + "goals2": [ + { + "name": "Asparuh Nikodimov", + "minute": 12 + }, + { + "name": "Todor Kolev", + "minute": 89 + } + ], + "ground": "Estadio Nou Camp, León" + }, + { + "round": "Group stage", + "group": "Group 4", + "date": "1970-06-10", + "time": "16:00", + "team1": "Germany", + "team2": "Peru", + "score": { + "ft": [ + 3, + 1 + ] + }, + "goals1": [ + { + "name": "Gerd Müller", + "minute": 19 + }, + { + "name": "Gerd Müller", + "minute": 26 + }, + { + "name": "Gerd Müller", + "minute": 39 + } + ], + "goals2": [ + { + "name": "Teófilo Cubillas", + "minute": 44 + } + ], + "ground": "Estadio Nou Camp, León" + }, + { + "round": "Group stage", + "group": "Group 4", + "date": "1970-06-11", + "time": "16:00", + "team1": "Bulgaria", + "team2": "Morocco", + "score": { + "ft": [ + 1, + 1 + ] + }, + "goals1": [ + { + "name": "Dobromir Zhechev", + "minute": 40 + } + ], + "goals2": [ + { + "name": "Maouhoub Ghazouani", + "minute": 61 + } + ], + "ground": "Estadio Nou Camp, León" } ] } \ No newline at end of file diff --git a/data/1978/worldcup.groups.json b/data/1978/worldcup.groups.json index 0e78088..ac46e37 100644 --- a/data/1978/worldcup.groups.json +++ b/data/1978/worldcup.groups.json @@ -18,6 +18,15 @@ "Mexico" ] }, + { + "name": "Group 3", + "teams": [ + "Austria", + "Spain", + "Brazil", + "Sweden" + ] + }, { "name": "Group 4", "teams": [ diff --git a/data/1978/worldcup.json b/data/1978/worldcup.json index bf66b8e..e275331 100644 --- a/data/1978/worldcup.json +++ b/data/1978/worldcup.json @@ -417,6 +417,143 @@ ], "ground": "Estadio Gigante de Arroyito, Rosario" }, + { + "round": "Group stage", + "group": "Group 3", + "date": "1978-06-03", + "time": "13:45", + "team1": "Austria", + "team2": "Spain", + "score": { + "ft": [ + 2, + 1 + ] + }, + "goals1": [ + { + "name": "Walter Schachner", + "minute": 10 + }, + { + "name": "Hans Krankl", + "minute": 76 + } + ], + "goals2": [ + { + "name": "Dani", + "minute": 21 + } + ], + "ground": "Estadio José Amalfitani, Buenos Aires" + }, + { + "round": "Group stage", + "group": "Group 3", + "date": "1978-06-03", + "time": "13:45", + "team1": "Brazil", + "team2": "Sweden", + "score": { + "ft": [ + 1, + 1 + ] + }, + "goals1": [ + { + "name": "Reinaldo", + "minute": 45 + } + ], + "goals2": [ + { + "name": "Thomas Sjöberg", + "minute": 37 + } + ], + "ground": "Estadio José Maria Minella, Mar del Plata" + }, + { + "round": "Group stage", + "group": "Group 3", + "date": "1978-06-07", + "time": "13:45", + "team1": "Austria", + "team2": "Sweden", + "score": { + "ft": [ + 1, + 0 + ] + }, + "goals1": [ + { + "name": "Hans Krankl", + "minute": 42, + "penalty": true + } + ], + "ground": "Estadio José Amalfitani, Buenos Aires" + }, + { + "round": "Group stage", + "group": "Group 3", + "date": "1978-06-07", + "time": "13:45", + "team1": "Brazil", + "team2": "Spain", + "score": { + "ft": [ + 0, + 0 + ] + }, + "ground": "Estadio José Maria Minella, Mar del Plata" + }, + { + "round": "Group stage", + "group": "Group 3", + "date": "1978-06-11", + "time": "13:45", + "team1": "Spain", + "team2": "Sweden", + "score": { + "ft": [ + 1, + 0 + ] + }, + "goals1": [ + { + "name": "Juan Manuel Asensi", + "minute": 75 + } + ], + "ground": "Estadio José Amalfitani, Buenos Aires" + }, + { + "round": "Group stage", + "group": "Group 3", + "date": "1978-06-11", + "time": "13:45", + "team1": "Brazil", + "team2": "Austria", + "score": { + "ft": [ + 1, + 0 + ] + }, + "goals1": [ + { + "name": "Roberto Dinamite", + "minute": 40 + } + ], + "ground": "Estadio José Maria Minella, Mar del Plata" + }, { "round": "Group stage", "group": "Group 4", diff --git a/data/1978/worldcup.stadiums.json b/data/1978/worldcup.stadiums.json index cea6578..4c01a6d 100644 --- a/data/1978/worldcup.stadiums.json +++ b/data/1978/worldcup.stadiums.json @@ -24,6 +24,14 @@ "name": "Estadio Olímpico Chateau Carreras", "city": "Córdoba" }, + { + "name": "Estadio José Amalfitani", + "city": "Buenos Aires" + }, + { + "name": "Estadio José Maria Minella", + "city": "Mar del Plata" + }, { "name": "Chateau Carreras", "city": "Córdoba" diff --git a/data/1982/worldcup.groups.json b/data/1982/worldcup.groups.json index c67a682..9331401 100644 --- a/data/1982/worldcup.groups.json +++ b/data/1982/worldcup.groups.json @@ -54,6 +54,14 @@ "New Zealand" ] }, + { + "name": "Group A", + "teams": [ + "Poland", + "Belgium", + "Soviet Union" + ] + }, { "name": "Group B", "teams": [ diff --git a/data/1982/worldcup.json b/data/1982/worldcup.json index cf78da0..8e4f93a 100644 --- a/data/1982/worldcup.json +++ b/data/1982/worldcup.json @@ -1199,6 +1199,71 @@ ], "ground": "Estadio Benito Villamarín, Seville" }, + { + "round": "Second group stage", + "group": "Group A", + "date": "1982-06-28", + "time": "21:00", + "team1": "Poland", + "team2": "Belgium", + "score": { + "ft": [ + 3, + 0 + ] + }, + "goals1": [ + { + "name": "Zbigniew Boniek", + "minute": 4 + }, + { + "name": "Zbigniew Boniek", + "minute": 26 + }, + { + "name": "Zbigniew Boniek", + "minute": 53 + } + ], + "ground": "Camp Nou, Barcelona" + }, + { + "round": "Second group stage", + "group": "Group A", + "date": "1982-07-01", + "time": "21:00", + "team1": "Belgium", + "team2": "Soviet Union", + "score": { + "ft": [ + 0, + 1 + ] + }, + "goals2": [ + { + "name": "Khoren Oganesian", + "minute": 48 + } + ], + "ground": "Camp Nou, Barcelona" + }, + { + "round": "Second group stage", + "group": "Group A", + "date": "1982-07-04", + "time": "21:00", + "team1": "Soviet Union", + "team2": "Poland", + "score": { + "ft": [ + 0, + 0 + ] + }, + "ground": "Camp Nou, Barcelona" + }, { "round": "Second group stage", "group": "Group B", diff --git a/data/1998/worldcup.groups.json b/data/1998/worldcup.groups.json index 40c182a..17cc82d 100644 --- a/data/1998/worldcup.groups.json +++ b/data/1998/worldcup.groups.json @@ -18,6 +18,15 @@ "Austria" ] }, + { + "name": "Group C", + "teams": [ + "Saudi Arabia", + "Denmark", + "France", + "South Africa" + ] + }, { "name": "Group D", "teams": [ diff --git a/data/1998/worldcup.json b/data/1998/worldcup.json index 9325e3a..dc738e9 100644 --- a/data/1998/worldcup.json +++ b/data/1998/worldcup.json @@ -857,6 +857,191 @@ ], "ground": "Stade de la Beaujoire, Nantes" }, + { + "round": "Group stage", + "group": "Group C", + "date": "1998-06-12", + "time": "17:30", + "team1": "Saudi Arabia", + "team2": "Denmark", + "score": { + "ft": [ + 0, + 1 + ] + }, + "goals2": [ + { + "name": "Marc Rieper", + "minute": 69 + } + ], + "ground": "Stade Félix-Bollaert, Lens" + }, + { + "round": "Group stage", + "group": "Group C", + "date": "1998-06-12", + "time": "21:00", + "team1": "France", + "team2": "South Africa", + "score": { + "ft": [ + 3, + 0 + ] + }, + "goals1": [ + { + "name": "Christophe Dugarry", + "minute": 36 + }, + { + "name": "Pierre Issa", + "minute": 77, + "owngoal": true + }, + { + "name": "Thierry Henry", + "minute": 90, + "offset": 2 + } + ], + "ground": "Stade Vélodrome, Marseille" + }, + { + "round": "Group stage", + "group": "Group C", + "date": "1998-06-18", + "time": "17:30", + "team1": "South Africa", + "team2": "Denmark", + "score": { + "ft": [ + 1, + 1 + ] + }, + "goals1": [ + { + "name": "Benni McCarthy", + "minute": 51 + } + ], + "goals2": [ + { + "name": "Allan Nielsen", + "minute": 12 + } + ], + "ground": "Stade de Toulouse, Toulouse" + }, + { + "round": "Group stage", + "group": "Group C", + "date": "1998-06-18", + "time": "21:00", + "team1": "France", + "team2": "Saudi Arabia", + "score": { + "ft": [ + 4, + 0 + ] + }, + "goals1": [ + { + "name": "Thierry Henry", + "minute": 37 + }, + { + "name": "Thierry Henry", + "minute": 78 + }, + { + "name": "David Trezeguet", + "minute": 68 + }, + { + "name": "Bixente Lizarazu", + "minute": 85 + } + ], + "ground": "Stade de France, Saint-Denis" + }, + { + "round": "Group stage", + "group": "Group C", + "date": "1998-06-24", + "time": "16:00", + "team1": "France", + "team2": "Denmark", + "score": { + "ft": [ + 2, + 1 + ] + }, + "goals1": [ + { + "name": "Youri Djorkaeff", + "minute": 12, + "penalty": true + }, + { + "name": "Emmanuel Petit", + "minute": 56 + } + ], + "goals2": [ + { + "name": "Michael Laudrup", + "minute": 42, + "penalty": true + } + ], + "ground": "Stade Gerland, Lyon" + }, + { + "round": "Group stage", + "group": "Group C", + "date": "1998-06-24", + "time": "16:00", + "team1": "South Africa", + "team2": "Saudi Arabia", + "score": { + "ft": [ + 2, + 2 + ] + }, + "goals1": [ + { + "name": "Shaun Bartlett", + "minute": 18 + }, + { + "name": "Shaun Bartlett", + "minute": 90, + "offset": 3, + "penalty": true + } + ], + "goals2": [ + { + "name": "Sami Al-Jaber", + "minute": 45, + "offset": 2, + "penalty": true + }, + { + "name": "Yousuf Al-Thunayan", + "minute": 74, + "penalty": true + } + ], + "ground": "Parc Lescure, Bordeaux" + }, { "round": "Group stage", "group": "Group D", diff --git a/data/2002/worldcup.groups.json b/data/2002/worldcup.groups.json index 44c05ee..0ae8f8d 100644 --- a/data/2002/worldcup.groups.json +++ b/data/2002/worldcup.groups.json @@ -36,6 +36,15 @@ "Portugal" ] }, + { + "name": "Group E", + "teams": [ + "Republic of Ireland", + "Cameroon", + "Germany", + "Saudi Arabia" + ] + }, { "name": "Group F", "teams": [ diff --git a/data/2002/worldcup.json b/data/2002/worldcup.json index 7b2e434..7d6baee 100644 --- a/data/2002/worldcup.json +++ b/data/2002/worldcup.json @@ -1154,6 +1154,187 @@ ], "ground": "Daejeon World Cup Stadium, Daejeon" }, + { + "round": "Group stage", + "group": "Group E", + "date": "2002-06-01", + "time": "15:30", + "team1": "Republic of Ireland", + "team2": "Cameroon", + "score": { + "ft": [ + 1, + 1 + ] + }, + "goals1": [ + { + "name": "Matt Holland", + "minute": 52 + } + ], + "goals2": [ + { + "name": "Patrick M'Boma", + "minute": 39 + } + ], + "ground": "Niigata Stadium, Niigata" + }, + { + "round": "Group stage", + "group": "Group E", + "date": "2002-06-01", + "time": "20:30", + "team1": "Germany", + "team2": "Saudi Arabia", + "score": { + "ft": [ + 8, + 0 + ] + }, + "goals1": [ + { + "name": "Miroslav Klose", + "minute": 20 + }, + { + "name": "Miroslav Klose", + "minute": 25 + }, + { + "name": "Miroslav Klose", + "minute": 70 + }, + { + "name": "Michael Ballack", + "minute": 40 + }, + { + "name": "Carsten Jancker", + "minute": 45, + "offset": 1 + }, + { + "name": "Thomas Linke", + "minute": 73 + }, + { + "name": "Oliver Bierhoff", + "minute": 84 + }, + { + "name": "Bernd Schneider", + "minute": 90, + "offset": 1 + } + ], + "ground": "Sapporo Dome, Sapporo" + }, + { + "round": "Group stage", + "group": "Group E", + "date": "2002-06-05", + "time": "20:30", + "team1": "Germany", + "team2": "Republic of Ireland", + "score": { + "ft": [ + 1, + 1 + ] + }, + "goals1": [ + { + "name": "Miroslav Klose", + "minute": 19 + } + ], + "goals2": [ + { + "name": "Robbie Keane", + "minute": 90, + "offset": 2 + } + ], + "ground": "Kashima Soccer Stadium, Ibaraki" + }, + { + "round": "Group stage", + "group": "Group E", + "date": "2002-06-06", + "time": "18:00", + "team1": "Cameroon", + "team2": "Saudi Arabia", + "score": { + "ft": [ + 1, + 0 + ] + }, + "goals1": [ + { + "name": "Samuel Eto'o", + "minute": 66 + } + ], + "ground": "Saitama Stadium, Saitama" + }, + { + "round": "Group stage", + "group": "Group E", + "date": "2002-06-11", + "time": "20:30", + "team1": "Cameroon", + "team2": "Germany", + "score": { + "ft": [ + 0, + 2 + ] + }, + "goals2": [ + { + "name": "Marco Bode", + "minute": 50 + }, + { + "name": "Miroslav Klose", + "minute": 79 + } + ], + "ground": "Ecopa Stadium, Shizuoka" + }, + { + "round": "Group stage", + "group": "Group E", + "date": "2002-06-11", + "time": "20:30", + "team1": "Saudi Arabia", + "team2": "Republic of Ireland", + "score": { + "ft": [ + 0, + 3 + ] + }, + "goals2": [ + { + "name": "Robbie Keane", + "minute": 7 + }, + { + "name": "Gary Breen", + "minute": 61 + }, + { + "name": "Damien Duff", + "minute": 87 + } + ], + "ground": "International Stadium Yokohama, Yokohama" + }, { "round": "Group stage", "group": "Group F", diff --git a/data/2002/worldcup.stadiums.json b/data/2002/worldcup.stadiums.json index cb3486d..02a550b 100644 --- a/data/2002/worldcup.stadiums.json +++ b/data/2002/worldcup.stadiums.json @@ -77,17 +77,29 @@ "city": "Ulsan" }, { - "name": "Kashima Soccer Stadium", - "city": "Ibaraki" - }, - { - "name": "Wing Stadium", - "city": "Kobe" + "name": "Niigata Stadium", + "city": "Niigata" }, { "name": "Sapporo Dome", "city": "Sapporo" }, + { + "name": "Kashima Soccer Stadium", + "city": "Ibaraki" + }, + { + "name": "Ecopa Stadium", + "city": "Shizuoka" + }, + { + "name": "International Stadium Yokohama", + "city": "Yokohama" + }, + { + "name": "Wing Stadium", + "city": "Kobe" + }, { "name": "Niigata Big Swan Stadium", "city": "Niigata" @@ -96,10 +108,6 @@ "name": "Ōita Big Eye Stadium", "city": "Ōita" }, - { - "name": "International Stadium Yokohama", - "city": "Yokohama" - }, { "name": "Shizuoka Ecopa Stadium", "city": "Fukuroi, Shizuoka" diff --git a/data/2006/worldcup.groups.json b/data/2006/worldcup.groups.json index c7d000c..ea8cc93 100644 --- a/data/2006/worldcup.groups.json +++ b/data/2006/worldcup.groups.json @@ -54,6 +54,15 @@ "Croatia" ] }, + { + "name": "Group G", + "teams": [ + "South Korea", + "Togo", + "France", + "Switzerland" + ] + }, { "name": "Group H", "teams": [ diff --git a/data/2006/worldcup.json b/data/2006/worldcup.json index 386a3ae..3c54245 100644 --- a/data/2006/worldcup.json +++ b/data/2006/worldcup.json @@ -1430,6 +1430,154 @@ ], "ground": "Gottlieb-Daimler-Stadion, Stuttgart" }, + { + "round": "Group stage", + "group": "Group G", + "date": "2006-06-13", + "time": "15:00", + "team1": "South Korea", + "team2": "Togo", + "score": { + "ft": [ + 2, + 1 + ] + }, + "goals1": [ + { + "name": "Lee Chun-soo", + "minute": 54 + }, + { + "name": "Ahn Jung-hwan", + "minute": 72 + } + ], + "goals2": [ + { + "name": "Mohamed Kader", + "minute": 31 + } + ], + "ground": "Waldstadion, Frankfurt" + }, + { + "round": "Group stage", + "group": "Group G", + "date": "2006-06-13", + "time": "18:00", + "team1": "France", + "team2": "Switzerland", + "score": { + "ft": [ + 0, + 0 + ] + }, + "ground": "Gottlieb-Daimler-Stadion, Stuttgart" + }, + { + "round": "Group stage", + "group": "Group G", + "date": "2006-06-18", + "time": "21:00", + "team1": "France", + "team2": "South Korea", + "score": { + "ft": [ + 1, + 1 + ] + }, + "goals1": [ + { + "name": "Thierry Henry", + "minute": 9 + } + ], + "goals2": [ + { + "name": "Park Ji-sung", + "minute": 81 + } + ], + "ground": "Zentralstadion, Leipzig" + }, + { + "round": "Group stage", + "group": "Group G", + "date": "2006-06-19", + "time": "15:00", + "team1": "Togo", + "team2": "Switzerland", + "score": { + "ft": [ + 0, + 2 + ] + }, + "goals2": [ + { + "name": "Alexander Frei", + "minute": 16 + }, + { + "name": "Tranquillo Barnetta", + "minute": 88 + } + ], + "ground": "Westfalenstadion, Dortmund" + }, + { + "round": "Group stage", + "group": "Group G", + "date": "2006-06-23", + "time": "21:00", + "team1": "Togo", + "team2": "France", + "score": { + "ft": [ + 0, + 2 + ] + }, + "goals2": [ + { + "name": "Patrick Vieira", + "minute": 55 + }, + { + "name": "Thierry Henry", + "minute": 61 + } + ], + "ground": "RheinEnergieStadion, Cologne" + }, + { + "round": "Group stage", + "group": "Group G", + "date": "2006-06-23", + "time": "21:00", + "team1": "Switzerland", + "team2": "South Korea", + "score": { + "ft": [ + 2, + 0 + ] + }, + "goals1": [ + { + "name": "Philippe Senderos", + "minute": 23 + }, + { + "name": "Alexander Frei", + "minute": 77 + } + ], + "ground": "Niedersachsenstadion, Hanover" + }, { "round": "Group stage", "group": "Group H", diff --git a/data/2022/worldcup.groups.json b/data/2022/worldcup.groups.json index 484d632..38c6b87 100644 --- a/data/2022/worldcup.groups.json +++ b/data/2022/worldcup.groups.json @@ -1,5 +1,14 @@ { "groups": [ + { + "name": "Group A", + "teams": [ + "Qatar", + "Ecuador", + "Senegal", + "Netherlands" + ] + }, { "name": "Group B", "teams": [ diff --git a/data/2022/worldcup.json b/data/2022/worldcup.json index d96b696..25ee447 100644 --- a/data/2022/worldcup.json +++ b/data/2022/worldcup.json @@ -540,6 +540,177 @@ ], "ground": "Lusail Stadium, Lusail" }, + { + "round": "Group stage", + "group": "Group A", + "date": "2022-11-20", + "time": "19:00", + "team1": "Qatar", + "team2": "Ecuador", + "score": { + "ft": [ + 0, + 2 + ] + }, + "goals2": [ + { + "name": "Enner Valencia", + "minute": 16, + "penalty": true + }, + { + "name": "Enner Valencia", + "minute": 31 + } + ], + "ground": "Al Bayt Stadium, Al Khor" + }, + { + "round": "Group stage", + "group": "Group A", + "date": "2022-11-21", + "time": "19:00", + "team1": "Senegal", + "team2": "Netherlands", + "score": { + "ft": [ + 0, + 2 + ] + }, + "goals2": [ + { + "name": "Cody Gakpo", + "minute": 84 + }, + { + "name": "Davy Klaassen", + "minute": 90, + "offset": 9 + } + ], + "ground": "Al Thumama Stadium, Doha" + }, + { + "round": "Group stage", + "group": "Group A", + "date": "2022-11-25", + "time": "16:00", + "team1": "Qatar", + "team2": "Senegal", + "score": { + "ft": [ + 1, + 3 + ] + }, + "goals1": [ + { + "name": "Mohammed Muntari", + "minute": 78 + } + ], + "goals2": [ + { + "name": "Boulaye Dia", + "minute": 41 + }, + { + "name": "Famara Diédhiou", + "minute": 48 + }, + { + "name": "Bamba Dieng", + "minute": 84 + } + ], + "ground": "Al Thumama Stadium, Doha" + }, + { + "round": "Group stage", + "group": "Group A", + "date": "2022-11-25", + "time": "19:00", + "team1": "Netherlands", + "team2": "Ecuador", + "score": { + "ft": [ + 1, + 1 + ] + }, + "goals1": [ + { + "name": "Cody Gakpo", + "minute": 6 + } + ], + "goals2": [ + { + "name": "Enner Valencia", + "minute": 49 + } + ], + "ground": "Khalifa International Stadium, Al Rayyan" + }, + { + "round": "Group stage", + "group": "Group A", + "date": "2022-11-29", + "time": "18:00", + "team1": "Ecuador", + "team2": "Senegal", + "score": { + "ft": [ + 1, + 2 + ] + }, + "goals1": [ + { + "name": "Moisés Caicedo", + "minute": 67 + } + ], + "goals2": [ + { + "name": "Ismaïla Sarr", + "minute": 44, + "penalty": true + }, + { + "name": "Kalidou Koulibaly", + "minute": 70 + } + ], + "ground": "Khalifa International Stadium, Al Rayyan" + }, + { + "round": "Group stage", + "group": "Group A", + "date": "2022-11-29", + "time": "18:00", + "team1": "Netherlands", + "team2": "Qatar", + "score": { + "ft": [ + 2, + 0 + ] + }, + "goals1": [ + { + "name": "Cody Gakpo", + "minute": 26 + }, + { + "name": "Frenkie de Jong", + "minute": 49 + } + ], + "ground": "Al Bayt Stadium, Al Khor" + }, { "round": "Group stage", "group": "Group B", diff --git a/lib/wiki-scraper.ts b/lib/wiki-scraper.ts index d3ffc90..68d63e8 100644 --- a/lib/wiki-scraper.ts +++ b/lib/wiki-scraper.ts @@ -58,14 +58,17 @@ type State = { active: boolean; round: string; group: string | null } const delay = (ms: number) => new Promise(r => setTimeout(r, ms)) -export async function fetchWikiHtml(page: string, retries = 5): Promise { +export async function fetchWikiHtml(page: string, retries = 6): Promise { const url = `https://en.wikipedia.org/w/api.php?action=parse&page=${encodeURIComponent(page)}&format=json&prop=text&disabletoc=1` for (let attempt = 0; attempt < retries; attempt++) { try { - if (attempt > 0) await delay(3000 * attempt) - const res = await fetch(url, { headers: { 'User-Agent': 'WorldCupScraper/1.0' } }) + if (attempt > 0) await delay(15000 * attempt) + const res = await fetch(url, { headers: { 'User-Agent': 'WorldCupScraper/1.0 (worldcup-stats)' } }) + if (res.status === 429) { await delay(30000); continue } if (!res.ok) continue - const data = await res.json() as { parse?: { text?: { '*': string } } } + const text = await res.text() + if (text.toLowerCase().startsWith('you are making')) { await delay(30000); continue } + const data = JSON.parse(text) as { parse?: { text?: { '*': string } } } const html = data?.parse?.text?.['*'] if (html) return html } catch { @@ -414,7 +417,7 @@ export async function scrapeYear( process.stdout.write(`[skip ${group}] `) continue } - await delay(1200) + await delay(3000) const subHtml = await fetchWikiHtml(page) if (!subHtml) { process.stdout.write(`(failed: ${page}) `); continue } diff --git a/scripts/scrape-wikipedia.ts b/scripts/scrape-wikipedia.ts index a5136ee..d8ce2ce 100644 --- a/scripts/scrape-wikipedia.ts +++ b/scripts/scrape-wikipedia.ts @@ -81,7 +81,7 @@ async function main() { const { matches, stadiums, groups, meta } = await scrapeYear(year, mainHtml) writeMatches(year, matches, stadiums, groups, meta) process.stdout.write(`${matches.length} matches`) - await delay(600) + await delay(2000) } if (doSquads) {