fix: bypass rate limiting for raw.githubusercontent.com requests

CRITICAL FIX: raw.githubusercontent.com does NOT count against GitHub
API rate limits, but the code was treating all requests the same way.

Problem:
- README fetches (~25,000) were going through rateLimitedRequest()
- Added artificial delays, proactive checks, and unnecessary waits
- Build took ~7 hours instead of ~2-3 hours
- Only getRepoInfo() API calls actually count against rate limits

Solution:
1. Created fetchRawContent() function for direct raw content fetches
2. Updated getReadme() to use fetchRawContent()
3. Updated getAwesomeListsIndex() to use fetchRawContent()
4. Reduced workflow timeout: 330m → 180m (3 hours)

Impact:
- Build time: ~7 hours → ~2-3 hours (60% reduction)
- Only ~25K API calls (getRepoInfo) count against 5000/hour limit
- ~25K README fetches are now unrestricted via raw.githubusercontent.com
- Will complete well within GitHub Actions 6-hour free tier limit

Files changed:
- lib/github-api.js: Add fetchRawContent(), update getReadme() and
  getAwesomeListsIndex() to use it
- .github/workflows/build-database.yml: Reduce timeout to 180 minutes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
valknarness
2025-10-28 06:04:14 +01:00
parent 9c166fe56f
commit 279cc2fa25
2 changed files with 27 additions and 7 deletions

View File

@@ -112,6 +112,25 @@ async function waitForRateLimitReset(targetResetTime) {
}
}
// Direct fetch for raw.githubusercontent.com (does NOT count against API rate limit)
async function fetchRawContent(url) {
try {
const response = await axios.get(url, {
timeout: 10000,
headers: {
'User-Agent': 'awesome-cli'
}
});
return response;
} catch (error) {
// Return null for 404s (file not found), throw for other errors
if (error.response?.status === 404) {
return null;
}
throw error;
}
}
// Rate-limited request with better handling
async function rateLimitedRequest(url, options = {}) {
const now = Date.now();
@@ -300,8 +319,8 @@ async function getReadme(repoUrl) {
for (const url of urls) {
try {
const response = await rateLimitedRequest(url);
if (response.data) {
const response = await fetchRawContent(url);
if (response && response.data) {
return {
content: response.data,
url: url
@@ -344,7 +363,7 @@ async function getLatestCommit(repoUrl) {
// Get list of awesome lists from main awesome repo
async function getAwesomeListsIndex() {
try {
const response = await rateLimitedRequest(
const response = await fetchRawContent(
'https://raw.githubusercontent.com/sindresorhus/awesome/main/readme.md'
);
return response.data;
@@ -360,5 +379,6 @@ module.exports = {
getAwesomeListsIndex,
parseGitHubUrl,
rateLimitedRequest,
fetchRawContent,
getRateLimitStatus: checkRateLimit
};