feat: replace Kaggle CSV with Wikipedia scraper for historical match data
Add scripts/scrape-wikipedia.ts that fetches all 22 World Cups (1930–2022) from English Wikipedia via MediaWiki API, handles group sub-pages, AET/penalty detection, and goal parsing, writing openfootball-format JSON to app/data/openfootball/. Rewrite scripts/seed.ts to read these local JSON files instead of the Kaggle CSV, producing 965 matches and 2716 goals with per-group assignments for all historical tournaments (enabling group standings on tournament pages). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Generated
+185
@@ -17,6 +17,9 @@ importers:
|
||||
'@heroicons/react':
|
||||
specifier: ^2.2.0
|
||||
version: 2.2.0(react@19.2.4)
|
||||
cheerio:
|
||||
specifier: ^1.2.0
|
||||
version: 1.2.0
|
||||
drizzle-orm:
|
||||
specifier: ^0.45.2
|
||||
version: 0.45.2(postgres@3.4.9)
|
||||
@@ -1402,6 +1405,9 @@ packages:
|
||||
engines: {node: '>=6.0.0'}
|
||||
hasBin: true
|
||||
|
||||
boolbase@1.0.0:
|
||||
resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==}
|
||||
|
||||
brace-expansion@1.1.15:
|
||||
resolution: {integrity: sha512-EwOCDEex4quD37XhqM3omwtMoJjr//isUZz1JopUNWms+4Z2ViyM/k1YIRePpoVNnQhENnxtFjLaxNHrT7xIUg==}
|
||||
|
||||
@@ -1444,6 +1450,13 @@ packages:
|
||||
resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==}
|
||||
engines: {node: '>=10'}
|
||||
|
||||
cheerio-select@2.1.0:
|
||||
resolution: {integrity: sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==}
|
||||
|
||||
cheerio@1.2.0:
|
||||
resolution: {integrity: sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==}
|
||||
engines: {node: '>=20.18.1'}
|
||||
|
||||
client-only@0.0.1:
|
||||
resolution: {integrity: sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==}
|
||||
|
||||
@@ -1468,6 +1481,13 @@ packages:
|
||||
resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
|
||||
engines: {node: '>= 8'}
|
||||
|
||||
css-select@5.2.2:
|
||||
resolution: {integrity: sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==}
|
||||
|
||||
css-what@6.2.2:
|
||||
resolution: {integrity: sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==}
|
||||
engines: {node: '>= 6'}
|
||||
|
||||
csstype@3.2.3:
|
||||
resolution: {integrity: sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==}
|
||||
|
||||
@@ -1522,6 +1542,19 @@ packages:
|
||||
resolution: {integrity: sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
|
||||
dom-serializer@2.0.0:
|
||||
resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==}
|
||||
|
||||
domelementtype@2.3.0:
|
||||
resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==}
|
||||
|
||||
domhandler@5.0.3:
|
||||
resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==}
|
||||
engines: {node: '>= 4'}
|
||||
|
||||
domutils@3.2.2:
|
||||
resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==}
|
||||
|
||||
drizzle-kit@0.31.10:
|
||||
resolution: {integrity: sha512-7OZcmQUrdGI+DUNNsKBn1aW8qSoKuTH7d0mYgSP8bAzdFzKoovxEFnoGQp2dVs82EOJeYycqRtciopszwUf8bw==}
|
||||
hasBin: true
|
||||
@@ -1628,10 +1661,25 @@ packages:
|
||||
emoji-regex@9.2.2:
|
||||
resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==}
|
||||
|
||||
encoding-sniffer@0.2.1:
|
||||
resolution: {integrity: sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==}
|
||||
|
||||
enhanced-resolve@5.21.6:
|
||||
resolution: {integrity: sha512-aNnGCvbJ/RIyWo1IuhNdVjnNF+EjH9wpzpNHt+ci/m9He9LJvUN8wrCcXjp9cWsGNAuvSpVFTx/vraAFQ8qGjQ==}
|
||||
engines: {node: '>=10.13.0'}
|
||||
|
||||
entities@4.5.0:
|
||||
resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==}
|
||||
engines: {node: '>=0.12'}
|
||||
|
||||
entities@6.0.1:
|
||||
resolution: {integrity: sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==}
|
||||
engines: {node: '>=0.12'}
|
||||
|
||||
entities@7.0.1:
|
||||
resolution: {integrity: sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==}
|
||||
engines: {node: '>=0.12'}
|
||||
|
||||
es-abstract@1.24.2:
|
||||
resolution: {integrity: sha512-2FpH9Q5i2RRwyEP1AylXe6nYLR5OhaJTZwmlcP0dL/+JCbgg7yyEo/sEK6HeGZRf3dFpWwThaRHVApXSkW3xeg==}
|
||||
engines: {node: '>= 0.4'}
|
||||
@@ -1972,6 +2020,13 @@ packages:
|
||||
hermes-parser@0.25.1:
|
||||
resolution: {integrity: sha512-6pEjquH3rqaI6cYAXYPcz9MS4rY6R4ngRgrgfDshRptUZIc3lw0MCIJIGDj9++mfySOuPTHB4nrSW99BCvOPIA==}
|
||||
|
||||
htmlparser2@10.1.0:
|
||||
resolution: {integrity: sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==}
|
||||
|
||||
iconv-lite@0.6.3:
|
||||
resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
|
||||
ignore@5.3.2:
|
||||
resolution: {integrity: sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==}
|
||||
engines: {node: '>= 4'}
|
||||
@@ -2318,6 +2373,9 @@ packages:
|
||||
resolution: {integrity: sha512-Uzmd6LXpouKo8EUK68IjH4+E01w/hXyV3R3g/geCJo+rXLNfh1xucB+LOzYEOQPSiUK3h/xZf0cQGcSsmyL2Og==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
nth-check@2.1.1:
|
||||
resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==}
|
||||
|
||||
object-assign@4.1.1:
|
||||
resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
@@ -2373,6 +2431,15 @@ packages:
|
||||
resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==}
|
||||
engines: {node: '>=6'}
|
||||
|
||||
parse5-htmlparser2-tree-adapter@7.1.0:
|
||||
resolution: {integrity: sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==}
|
||||
|
||||
parse5-parser-stream@7.1.2:
|
||||
resolution: {integrity: sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==}
|
||||
|
||||
parse5@7.3.0:
|
||||
resolution: {integrity: sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==}
|
||||
|
||||
path-exists@4.0.0:
|
||||
resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==}
|
||||
engines: {node: '>=8'}
|
||||
@@ -2479,6 +2546,9 @@ packages:
|
||||
resolution: {integrity: sha512-x/+Cz4YrimQxQccJf5mKEbIa1NzeCRNI5Ecl/ekmlYaampdNLPalVyIcCZNNH3MvmqBugV5TMYZXv0ljslUlaw==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
safer-buffer@2.1.2:
|
||||
resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==}
|
||||
|
||||
scheduler@0.27.0:
|
||||
resolution: {integrity: sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==}
|
||||
|
||||
@@ -2672,6 +2742,10 @@ packages:
|
||||
undici-types@6.21.0:
|
||||
resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
|
||||
|
||||
undici@7.27.2:
|
||||
resolution: {integrity: sha512-uZsKNuzQxDMUY6M3pIMvy5tvlGmtq8XJ2oLAkfRKGNu+1VQAIvLy2xIVG5ATZl5wDXl/tddByAWCizRbOme+TA==}
|
||||
engines: {node: '>=20.18.1'}
|
||||
|
||||
unrs-resolver@1.12.2:
|
||||
resolution: {integrity: sha512-dmlRxBJJayXjqTwC+JtF1HhJmgf3ftQ3YejFcZrf4+KKtJv0qDsK1pjqaaVjG7wJ5NJ6UVP1OqRMQ71Z4C3rxQ==}
|
||||
|
||||
@@ -2687,6 +2761,15 @@ packages:
|
||||
urlpattern-polyfill@10.1.0:
|
||||
resolution: {integrity: sha512-IGjKp/o0NL3Bso1PymYURCJxMPNAf/ILOpendP9f5B6e1rTJgdgiOvgfoT8VxCAdY+Wisb9uhGaJJf3yZ2V9nw==}
|
||||
|
||||
whatwg-encoding@3.1.1:
|
||||
resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==}
|
||||
engines: {node: '>=18'}
|
||||
deprecated: Use @exodus/bytes instead for a more spec-conformant and faster implementation
|
||||
|
||||
whatwg-mimetype@4.0.0:
|
||||
resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
which-boxed-primitive@1.1.1:
|
||||
resolution: {integrity: sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA==}
|
||||
engines: {node: '>= 0.4'}
|
||||
@@ -3829,6 +3912,8 @@ snapshots:
|
||||
|
||||
baseline-browser-mapping@2.10.37: {}
|
||||
|
||||
boolbase@1.0.0: {}
|
||||
|
||||
brace-expansion@1.1.15:
|
||||
dependencies:
|
||||
balanced-match: 1.0.2
|
||||
@@ -3878,6 +3963,29 @@ snapshots:
|
||||
ansi-styles: 4.3.0
|
||||
supports-color: 7.2.0
|
||||
|
||||
cheerio-select@2.1.0:
|
||||
dependencies:
|
||||
boolbase: 1.0.0
|
||||
css-select: 5.2.2
|
||||
css-what: 6.2.2
|
||||
domelementtype: 2.3.0
|
||||
domhandler: 5.0.3
|
||||
domutils: 3.2.2
|
||||
|
||||
cheerio@1.2.0:
|
||||
dependencies:
|
||||
cheerio-select: 2.1.0
|
||||
dom-serializer: 2.0.0
|
||||
domhandler: 5.0.3
|
||||
domutils: 3.2.2
|
||||
encoding-sniffer: 0.2.1
|
||||
htmlparser2: 10.1.0
|
||||
parse5: 7.3.0
|
||||
parse5-htmlparser2-tree-adapter: 7.1.0
|
||||
parse5-parser-stream: 7.1.2
|
||||
undici: 7.27.2
|
||||
whatwg-mimetype: 4.0.0
|
||||
|
||||
client-only@0.0.1: {}
|
||||
|
||||
color-convert@2.0.1:
|
||||
@@ -3900,6 +4008,16 @@ snapshots:
|
||||
shebang-command: 2.0.0
|
||||
which: 2.0.2
|
||||
|
||||
css-select@5.2.2:
|
||||
dependencies:
|
||||
boolbase: 1.0.0
|
||||
css-what: 6.2.2
|
||||
domhandler: 5.0.3
|
||||
domutils: 3.2.2
|
||||
nth-check: 2.1.1
|
||||
|
||||
css-what@6.2.2: {}
|
||||
|
||||
csstype@3.2.3: {}
|
||||
|
||||
damerau-levenshtein@1.0.8: {}
|
||||
@@ -3950,6 +4068,24 @@ snapshots:
|
||||
dependencies:
|
||||
esutils: 2.0.3
|
||||
|
||||
dom-serializer@2.0.0:
|
||||
dependencies:
|
||||
domelementtype: 2.3.0
|
||||
domhandler: 5.0.3
|
||||
entities: 4.5.0
|
||||
|
||||
domelementtype@2.3.0: {}
|
||||
|
||||
domhandler@5.0.3:
|
||||
dependencies:
|
||||
domelementtype: 2.3.0
|
||||
|
||||
domutils@3.2.2:
|
||||
dependencies:
|
||||
dom-serializer: 2.0.0
|
||||
domelementtype: 2.3.0
|
||||
domhandler: 5.0.3
|
||||
|
||||
drizzle-kit@0.31.10:
|
||||
dependencies:
|
||||
'@drizzle-team/brocli': 0.10.2
|
||||
@@ -3971,11 +4107,22 @@ snapshots:
|
||||
|
||||
emoji-regex@9.2.2: {}
|
||||
|
||||
encoding-sniffer@0.2.1:
|
||||
dependencies:
|
||||
iconv-lite: 0.6.3
|
||||
whatwg-encoding: 3.1.1
|
||||
|
||||
enhanced-resolve@5.21.6:
|
||||
dependencies:
|
||||
graceful-fs: 4.2.11
|
||||
tapable: 2.3.3
|
||||
|
||||
entities@4.5.0: {}
|
||||
|
||||
entities@6.0.1: {}
|
||||
|
||||
entities@7.0.1: {}
|
||||
|
||||
es-abstract@1.24.2:
|
||||
dependencies:
|
||||
array-buffer-byte-length: 1.0.2
|
||||
@@ -4540,6 +4687,17 @@ snapshots:
|
||||
dependencies:
|
||||
hermes-estree: 0.25.1
|
||||
|
||||
htmlparser2@10.1.0:
|
||||
dependencies:
|
||||
domelementtype: 2.3.0
|
||||
domhandler: 5.0.3
|
||||
domutils: 3.2.2
|
||||
entities: 7.0.1
|
||||
|
||||
iconv-lite@0.6.3:
|
||||
dependencies:
|
||||
safer-buffer: 2.1.2
|
||||
|
||||
ignore@5.3.2: {}
|
||||
|
||||
ignore@7.0.5: {}
|
||||
@@ -4859,6 +5017,10 @@ snapshots:
|
||||
|
||||
node-releases@2.0.47: {}
|
||||
|
||||
nth-check@2.1.1:
|
||||
dependencies:
|
||||
boolbase: 1.0.0
|
||||
|
||||
object-assign@4.1.1: {}
|
||||
|
||||
object-inspect@1.13.4: {}
|
||||
@@ -4935,6 +5097,19 @@ snapshots:
|
||||
dependencies:
|
||||
callsites: 3.1.0
|
||||
|
||||
parse5-htmlparser2-tree-adapter@7.1.0:
|
||||
dependencies:
|
||||
domhandler: 5.0.3
|
||||
parse5: 7.3.0
|
||||
|
||||
parse5-parser-stream@7.1.2:
|
||||
dependencies:
|
||||
parse5: 7.3.0
|
||||
|
||||
parse5@7.3.0:
|
||||
dependencies:
|
||||
entities: 6.0.1
|
||||
|
||||
path-exists@4.0.0: {}
|
||||
|
||||
path-key@3.1.1: {}
|
||||
@@ -5046,6 +5221,8 @@ snapshots:
|
||||
es-errors: 1.3.0
|
||||
is-regex: 1.2.1
|
||||
|
||||
safer-buffer@2.1.2: {}
|
||||
|
||||
scheduler@0.27.0: {}
|
||||
|
||||
semver@6.3.1: {}
|
||||
@@ -5315,6 +5492,8 @@ snapshots:
|
||||
|
||||
undici-types@6.21.0: {}
|
||||
|
||||
undici@7.27.2: {}
|
||||
|
||||
unrs-resolver@1.12.2:
|
||||
dependencies:
|
||||
napi-postinstall: 0.3.4
|
||||
@@ -5354,6 +5533,12 @@ snapshots:
|
||||
|
||||
urlpattern-polyfill@10.1.0: {}
|
||||
|
||||
whatwg-encoding@3.1.1:
|
||||
dependencies:
|
||||
iconv-lite: 0.6.3
|
||||
|
||||
whatwg-mimetype@4.0.0: {}
|
||||
|
||||
which-boxed-primitive@1.1.1:
|
||||
dependencies:
|
||||
is-bigint: 1.1.0
|
||||
|
||||
Reference in New Issue
Block a user