Files
awesome/lib/indexer.js

371 lines
12 KiB
JavaScript
Raw Normal View History

2025-10-25 15:52:06 +02:00
const ora = require('ora');
const chalk = require('chalk');
const inquirer = require('inquirer');
const { nanospinner } = require('nanospinner');
const cliProgress = require('cli-progress');
const { purpleGold, pinkPurple, goldPink, sectionHeader } = require('./banner');
const github = require('./github-api');
const db = require('./db-operations');
// Parse markdown to extract links
function parseMarkdownLinks(markdown) {
const lines = markdown.split('\n');
const links = [];
let currentCategory = null;
for (const line of lines) {
// Category headers (## Category Name)
const categoryMatch = line.match(/^##\s+(.+)$/);
if (categoryMatch) {
currentCategory = categoryMatch[1].trim();
continue;
}
// List items: - [Name](url) - Description
const linkMatch = line.match(/^-\s+\[([^\]]+)\]\(([^)]+)\)(?:\s+-\s+(.+))?/);
if (linkMatch) {
const [, name, url, description] = linkMatch;
// Only GitHub URLs
if (url.includes('github.com')) {
links.push({
name: name.trim(),
url: url.trim(),
description: description ? description.trim() : '',
category: currentCategory
});
}
}
}
return links;
}
// Extract text content from markdown
function extractTextContent(markdown) {
let text = markdown;
// Remove code blocks
text = text.replace(/```[\s\S]*?```/g, '');
text = text.replace(/`[^`]+`/g, '');
// Remove images
text = text.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1');
// Remove links but keep text
text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
// Remove HTML tags
text = text.replace(/<[^>]+>/g, '');
// Remove markdown headers
text = text.replace(/^#{1,6}\s+/gm, '');
// Remove horizontal rules
text = text.replace(/^(-{3,}|\*{3,}|_{3,})$/gm, '');
// Remove list markers
text = text.replace(/^[\s]*[-*+]\s+/gm, '');
text = text.replace(/^[\s]*\d+\.\s+/gm, '');
// Normalize whitespace
text = text.replace(/\s+/g, ' ').trim();
return text;
}
// Check if URL is an awesome list (not a regular project)
function isAwesomeList(url, name, description) {
const lowerName = name.toLowerCase();
const lowerDesc = (description || '').toLowerCase();
const urlLower = url.toLowerCase();
return (
lowerName.includes('awesome') ||
lowerDesc.includes('curated list') ||
lowerDesc.includes('awesome list') ||
urlLower.includes('/awesome-')
);
}
// Build the complete index
async function buildIndex(force = false, mode = null, incremental = true) {
2025-10-25 15:52:06 +02:00
console.clear();
console.log(purpleGold('\n🚀 AWESOME INDEX BUILDER 🚀\n'));
2025-10-26 14:00:45 +01:00
// Check if running in CI/non-interactive mode
const isNonInteractive = process.env.CI === 'true' || mode !== null;
2025-10-25 15:52:06 +02:00
2025-10-26 14:00:45 +01:00
if (force) {
if (isNonInteractive) {
// Clear index data without confirmation in CI
console.log(chalk.gray('\nClearing existing index...'));
const dbInstance = require('./database').getDb();
dbInstance.exec('DELETE FROM readmes');
dbInstance.exec('DELETE FROM repositories');
dbInstance.exec('DELETE FROM awesome_lists');
console.log(chalk.green('✓ Index cleared\n'));
} else {
const { confirm } = await inquirer.prompt([
{
type: 'confirm',
name: 'confirm',
message: chalk.yellow('⚠️ Force rebuild will clear all indexed data (bookmarks will be preserved). Continue?'),
default: false
}
]);
if (!confirm) return;
// Clear index data (keep bookmarks)
console.log(chalk.gray('\nClearing existing index...'));
const dbInstance = require('./database').getDb();
dbInstance.exec('DELETE FROM readmes');
dbInstance.exec('DELETE FROM repositories');
dbInstance.exec('DELETE FROM awesome_lists');
console.log(chalk.green('✓ Index cleared\n'));
}
2025-10-25 15:52:06 +02:00
}
// Fetch main awesome list
const spinner = ora(chalk.hex('#DA22FF')('Fetching the awesome list of awesome lists...')).start();
let mainReadme;
try {
mainReadme = await github.getAwesomeListsIndex();
spinner.succeed(chalk.green('✓ Fetched main awesome index!'));
} catch (error) {
spinner.fail(chalk.red('✗ Failed to fetch main index'));
console.error(chalk.red(error.message));
throw error; // Throw instead of return so CI fails properly
2025-10-25 15:52:06 +02:00
}
// Parse links from main index
console.log(chalk.hex('#FF69B4')('\n📝 Parsing awesome lists...'));
const awesomeLists = parseMarkdownLinks(mainReadme);
console.log(chalk.green(`✓ Found ${awesomeLists.length} awesome lists!\n`));
2025-10-26 14:00:45 +01:00
let indexChoice = mode;
let incrementalChoice = incremental;
2025-10-26 14:00:45 +01:00
// Ask user what to index (only if interactive)
if (!isNonInteractive) {
const result = await inquirer.prompt([
{
type: 'list',
name: 'indexChoice',
message: 'What would you like to index?',
choices: [
{ name: '🎯 Index everything (recommended for first run)', value: 'full' },
{ name: '📋 Index lists only (metadata, no READMEs)', value: 'lists' },
{ name: '🎲 Index a random sample (10 lists)', value: 'sample' },
{ name: '🔍 Select specific categories', value: 'select' },
{ name: '← Back', value: 'cancel' }
]
},
{
type: 'confirm',
name: 'incremental',
message: 'Use incremental mode? (only update changed repos, faster)',
default: true,
when: (answers) => answers.indexChoice === 'full' || answers.indexChoice === 'sample'
2025-10-26 14:00:45 +01:00
}
]);
indexChoice = result.indexChoice;
if (result.incremental !== undefined) {
incrementalChoice = result.incremental;
}
2025-10-26 14:00:45 +01:00
}
2025-10-25 15:52:06 +02:00
if (indexChoice === 'cancel') return;
2025-10-26 14:00:45 +01:00
// Default to 'full' if no mode specified
if (!indexChoice) indexChoice = 'full';
console.log(chalk.cyan(`Index mode: ${indexChoice}`));
if (indexChoice === 'full' || indexChoice === 'sample') {
console.log(chalk.cyan(`Incremental: ${incrementalChoice ? 'Yes (skip unchanged repos)' : 'No (re-index all)'}\n`));
} else {
console.log('');
}
2025-10-26 14:00:45 +01:00
2025-10-25 15:52:06 +02:00
let listsToIndex = awesomeLists;
if (indexChoice === 'sample') {
listsToIndex = awesomeLists.sort(() => 0.5 - Math.random()).slice(0, 10);
} else if (indexChoice === 'select') {
2025-10-26 14:00:45 +01:00
if (isNonInteractive) {
console.log(chalk.yellow('Select mode not available in non-interactive mode, using full'));
indexChoice = 'full';
} else {
const categories = [...new Set(awesomeLists.map(l => l.category).filter(Boolean))];
const { selectedCategories } = await inquirer.prompt([
{
type: 'checkbox',
name: 'selectedCategories',
message: 'Select categories to index:',
choices: categories,
pageSize: 15
}
]);
if (selectedCategories.length === 0) {
console.log(chalk.yellow('No categories selected'));
return;
2025-10-25 15:52:06 +02:00
}
2025-10-26 14:00:45 +01:00
listsToIndex = awesomeLists.filter(l => selectedCategories.includes(l.category));
2025-10-25 15:52:06 +02:00
}
}
2025-10-26 22:04:46 +01:00
// Check rate limit status before starting
try {
const rateLimitInfo = await github.getRateLimitStatus();
if (rateLimitInfo) {
console.log(chalk.cyan('📊 GitHub API Rate Limit Status:'));
console.log(chalk.gray(` Limit: ${rateLimitInfo.limit} requests/hour`));
console.log(chalk.gray(` Remaining: ${rateLimitInfo.remaining}/${rateLimitInfo.limit}`));
console.log(chalk.gray(` Used: ${rateLimitInfo.used}`));
console.log(chalk.gray(` Resets at: ${new Date(rateLimitInfo.reset).toISOString()}`));
console.log();
if (rateLimitInfo.limit === 60) {
console.log(chalk.yellow('⚠️ WARNING: Using unauthenticated rate limit (60/hour)'));
console.log(chalk.yellow(' This will likely not be enough to complete indexing'));
console.log();
}
}
} catch (error) {
console.log(chalk.gray('Could not check rate limit status, continuing...'));
}
2025-10-25 15:52:06 +02:00
console.log(pinkPurple(`\n✨ Starting index of ${listsToIndex.length} awesome lists ✨\n`));
// Progress bars
const multibar = new cliProgress.MultiBar({
clearOnComplete: false,
hideCursor: true,
format: ' {bar} | {percentage}% | {value}/{total} | {name}'
}, cliProgress.Presets.shades_classic);
const listBar = multibar.create(listsToIndex.length, 0, { name: 'Lists' });
const repoBar = multibar.create(100, 0, { name: 'Repos' });
let totalRepos = 0;
let indexedRepos = 0;
let indexedReadmes = 0;
let skipped404s = 0;
// Index each awesome list
for (let i = 0; i < listsToIndex.length; i++) {
const list = listsToIndex[i];
listBar.update(i + 1, { name: `Lists: ${list.name.substring(0, 30)}` });
try {
// Add list to database
const listId = db.addAwesomeList(list.name, list.url, list.description, list.category, 1, null);
// Fetch list README
const readme = await github.getReadme(list.url);
if (!readme) continue;
// Parse repositories from the list
const repos = parseMarkdownLinks(readme.content);
totalRepos += repos.length;
repoBar.setTotal(totalRepos);
// Index repositories
for (const repo of repos) {
try {
// Check if repo already exists (incremental indexing)
const existingRepo = db.getRepositoryByUrl(repo.url);
2025-10-25 15:52:06 +02:00
// Get repo info from GitHub
const repoInfo = await github.getRepoInfo(repo.url);
if (repoInfo) {
const repoId = db.addRepository(listId, repoInfo.name, repo.url, repo.description || repoInfo.description, repoInfo);
indexedRepos++;
// Determine if we need to fetch README
let shouldFetchReadme = false;
2025-10-25 15:52:06 +02:00
if (indexChoice === 'full' || indexChoice === 'sample') {
if (incrementalChoice) {
// Incremental mode: only fetch if new or changed
if (!existingRepo) {
// New repo - fetch README
shouldFetchReadme = true;
} else if (existingRepo.last_commit !== repoInfo.pushedAt) {
// Repo updated since last index - fetch README
shouldFetchReadme = true;
}
// else: repo unchanged, skip README fetch
} else {
// Full re-index mode: always fetch README
shouldFetchReadme = true;
}
}
if (shouldFetchReadme) {
2025-10-25 15:52:06 +02:00
const repoReadme = await github.getReadme(repo.url);
if (repoReadme) {
const textContent = extractTextContent(repoReadme.content);
db.addReadme(repoId, textContent, repoReadme.content);
indexedReadmes++;
}
}
} else {
// Repo returned null (likely 404 - deleted/moved)
skipped404s++;
}
repoBar.update(indexedRepos, { name: `Repos: ${repo.name.substring(0, 30)}` });
} catch (error) {
// Handle rate limit skip
if (error.message === 'SKIP_RATE_LIMIT') {
console.log(chalk.yellow('\n⚠ Skipping remaining items due to rate limit...'));
break; // Exit repo loop
}
// Skip failed repos
continue;
}
}
} catch (error) {
// Skip failed lists
continue;
}
}
multibar.stop();
// Summary
console.log(goldPink('\n\n✨ INDEX BUILD COMPLETE! ✨\n'));
console.log(chalk.hex('#DA22FF')('📊 Summary:'));
console.log(chalk.gray('━'.repeat(50)));
console.log(chalk.hex('#FF69B4')(` Awesome Lists: ${chalk.bold(listsToIndex.length)}`));
console.log(chalk.hex('#FFD700')(` Repositories: ${chalk.bold(indexedRepos)}`));
console.log(chalk.hex('#DA22FF')(` READMEs: ${chalk.bold(indexedReadmes)}`));
if (skipped404s > 0) {
console.log(chalk.hex('#9733EE')(` Skipped (404): ${chalk.bold(skipped404s)} ${chalk.gray('(deleted/moved repos)')}`));
}
console.log(chalk.gray('━'.repeat(50)));
console.log();
const stats = db.getStats();
console.log(chalk.hex('#FF69B4')('🗄️ Total in Database:'));
console.log(chalk.gray(` Lists: ${stats.awesomeLists} | Repos: ${stats.repositories} | READMEs: ${stats.readmes}`));
console.log();
console.log(chalk.green('✓ You can now search and explore! Try:\n'));
console.log(chalk.gray(' • awesome search "your query"'));
console.log(chalk.gray(' • awesome shell'));
console.log(chalk.gray(' • awesome browse\n'));
}
module.exports = {
buildIndex,
parseMarkdownLinks,
extractTextContent,
isAwesomeList
};