a new start
This commit is contained in:
284
lib/indexer.js
Normal file
284
lib/indexer.js
Normal file
@@ -0,0 +1,284 @@
|
||||
const ora = require('ora');
|
||||
const chalk = require('chalk');
|
||||
const inquirer = require('inquirer');
|
||||
const { nanospinner } = require('nanospinner');
|
||||
const cliProgress = require('cli-progress');
|
||||
const { purpleGold, pinkPurple, goldPink, sectionHeader } = require('./banner');
|
||||
const github = require('./github-api');
|
||||
const db = require('./db-operations');
|
||||
|
||||
// Parse markdown to extract links
|
||||
function parseMarkdownLinks(markdown) {
|
||||
const lines = markdown.split('\n');
|
||||
const links = [];
|
||||
let currentCategory = null;
|
||||
|
||||
for (const line of lines) {
|
||||
// Category headers (## Category Name)
|
||||
const categoryMatch = line.match(/^##\s+(.+)$/);
|
||||
if (categoryMatch) {
|
||||
currentCategory = categoryMatch[1].trim();
|
||||
continue;
|
||||
}
|
||||
|
||||
// List items: - [Name](url) - Description
|
||||
const linkMatch = line.match(/^-\s+\[([^\]]+)\]\(([^)]+)\)(?:\s+-\s+(.+))?/);
|
||||
if (linkMatch) {
|
||||
const [, name, url, description] = linkMatch;
|
||||
|
||||
// Only GitHub URLs
|
||||
if (url.includes('github.com')) {
|
||||
links.push({
|
||||
name: name.trim(),
|
||||
url: url.trim(),
|
||||
description: description ? description.trim() : '',
|
||||
category: currentCategory
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return links;
|
||||
}
|
||||
|
||||
// Extract text content from markdown
|
||||
function extractTextContent(markdown) {
|
||||
let text = markdown;
|
||||
|
||||
// Remove code blocks
|
||||
text = text.replace(/```[\s\S]*?```/g, '');
|
||||
text = text.replace(/`[^`]+`/g, '');
|
||||
|
||||
// Remove images
|
||||
text = text.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1');
|
||||
|
||||
// Remove links but keep text
|
||||
text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
|
||||
|
||||
// Remove HTML tags
|
||||
text = text.replace(/<[^>]+>/g, '');
|
||||
|
||||
// Remove markdown headers
|
||||
text = text.replace(/^#{1,6}\s+/gm, '');
|
||||
|
||||
// Remove horizontal rules
|
||||
text = text.replace(/^(-{3,}|\*{3,}|_{3,})$/gm, '');
|
||||
|
||||
// Remove list markers
|
||||
text = text.replace(/^[\s]*[-*+]\s+/gm, '');
|
||||
text = text.replace(/^[\s]*\d+\.\s+/gm, '');
|
||||
|
||||
// Normalize whitespace
|
||||
text = text.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
// Check if URL is an awesome list (not a regular project)
|
||||
function isAwesomeList(url, name, description) {
|
||||
const lowerName = name.toLowerCase();
|
||||
const lowerDesc = (description || '').toLowerCase();
|
||||
const urlLower = url.toLowerCase();
|
||||
|
||||
return (
|
||||
lowerName.includes('awesome') ||
|
||||
lowerDesc.includes('curated list') ||
|
||||
lowerDesc.includes('awesome list') ||
|
||||
urlLower.includes('/awesome-')
|
||||
);
|
||||
}
|
||||
|
||||
// Build the complete index
|
||||
async function buildIndex(force = false) {
|
||||
console.clear();
|
||||
console.log(purpleGold('\n🚀 AWESOME INDEX BUILDER 🚀\n'));
|
||||
|
||||
if (force) {
|
||||
const { confirm } = await inquirer.prompt([
|
||||
{
|
||||
type: 'confirm',
|
||||
name: 'confirm',
|
||||
message: chalk.yellow('⚠️ Force rebuild will clear all indexed data (bookmarks will be preserved). Continue?'),
|
||||
default: false
|
||||
}
|
||||
]);
|
||||
|
||||
if (!confirm) return;
|
||||
|
||||
// Clear index data (keep bookmarks)
|
||||
console.log(chalk.gray('\nClearing existing index...'));
|
||||
const dbInstance = require('./database').getDb();
|
||||
dbInstance.exec('DELETE FROM readmes');
|
||||
dbInstance.exec('DELETE FROM repositories');
|
||||
dbInstance.exec('DELETE FROM awesome_lists');
|
||||
console.log(chalk.green('✓ Index cleared\n'));
|
||||
}
|
||||
|
||||
// Fetch main awesome list
|
||||
const spinner = ora(chalk.hex('#DA22FF')('Fetching the awesome list of awesome lists...')).start();
|
||||
|
||||
let mainReadme;
|
||||
try {
|
||||
mainReadme = await github.getAwesomeListsIndex();
|
||||
spinner.succeed(chalk.green('✓ Fetched main awesome index!'));
|
||||
} catch (error) {
|
||||
spinner.fail(chalk.red('✗ Failed to fetch main index'));
|
||||
console.error(chalk.red(error.message));
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse links from main index
|
||||
console.log(chalk.hex('#FF69B4')('\n📝 Parsing awesome lists...'));
|
||||
const awesomeLists = parseMarkdownLinks(mainReadme);
|
||||
console.log(chalk.green(`✓ Found ${awesomeLists.length} awesome lists!\n`));
|
||||
|
||||
// Ask user what to index
|
||||
const { indexChoice } = await inquirer.prompt([
|
||||
{
|
||||
type: 'list',
|
||||
name: 'indexChoice',
|
||||
message: 'What would you like to index?',
|
||||
choices: [
|
||||
{ name: '🎯 Index everything (recommended for first run)', value: 'full' },
|
||||
{ name: '📋 Index lists only (metadata, no READMEs)', value: 'lists' },
|
||||
{ name: '🎲 Index a random sample (10 lists)', value: 'sample' },
|
||||
{ name: '🔍 Select specific categories', value: 'select' },
|
||||
{ name: '← Back', value: 'cancel' }
|
||||
]
|
||||
}
|
||||
]);
|
||||
|
||||
if (indexChoice === 'cancel') return;
|
||||
|
||||
let listsToIndex = awesomeLists;
|
||||
|
||||
if (indexChoice === 'sample') {
|
||||
listsToIndex = awesomeLists.sort(() => 0.5 - Math.random()).slice(0, 10);
|
||||
} else if (indexChoice === 'select') {
|
||||
const categories = [...new Set(awesomeLists.map(l => l.category).filter(Boolean))];
|
||||
const { selectedCategories } = await inquirer.prompt([
|
||||
{
|
||||
type: 'checkbox',
|
||||
name: 'selectedCategories',
|
||||
message: 'Select categories to index:',
|
||||
choices: categories,
|
||||
pageSize: 15
|
||||
}
|
||||
]);
|
||||
|
||||
if (selectedCategories.length === 0) {
|
||||
console.log(chalk.yellow('No categories selected'));
|
||||
return;
|
||||
}
|
||||
|
||||
listsToIndex = awesomeLists.filter(l => selectedCategories.includes(l.category));
|
||||
}
|
||||
|
||||
console.log(pinkPurple(`\n✨ Starting index of ${listsToIndex.length} awesome lists ✨\n`));
|
||||
|
||||
// Progress bars
|
||||
const multibar = new cliProgress.MultiBar({
|
||||
clearOnComplete: false,
|
||||
hideCursor: true,
|
||||
format: ' {bar} | {percentage}% | {value}/{total} | {name}'
|
||||
}, cliProgress.Presets.shades_classic);
|
||||
|
||||
const listBar = multibar.create(listsToIndex.length, 0, { name: 'Lists' });
|
||||
const repoBar = multibar.create(100, 0, { name: 'Repos' });
|
||||
|
||||
let totalRepos = 0;
|
||||
let indexedRepos = 0;
|
||||
let indexedReadmes = 0;
|
||||
let skipped404s = 0;
|
||||
|
||||
// Index each awesome list
|
||||
for (let i = 0; i < listsToIndex.length; i++) {
|
||||
const list = listsToIndex[i];
|
||||
listBar.update(i + 1, { name: `Lists: ${list.name.substring(0, 30)}` });
|
||||
|
||||
try {
|
||||
// Add list to database
|
||||
const listId = db.addAwesomeList(list.name, list.url, list.description, list.category, 1, null);
|
||||
|
||||
// Fetch list README
|
||||
const readme = await github.getReadme(list.url);
|
||||
if (!readme) continue;
|
||||
|
||||
// Parse repositories from the list
|
||||
const repos = parseMarkdownLinks(readme.content);
|
||||
totalRepos += repos.length;
|
||||
repoBar.setTotal(totalRepos);
|
||||
|
||||
// Index repositories
|
||||
for (const repo of repos) {
|
||||
try {
|
||||
// Get repo info from GitHub
|
||||
const repoInfo = await github.getRepoInfo(repo.url);
|
||||
|
||||
if (repoInfo) {
|
||||
const repoId = db.addRepository(listId, repoInfo.name, repo.url, repo.description || repoInfo.description, repoInfo);
|
||||
indexedRepos++;
|
||||
|
||||
// Index README if in full mode
|
||||
if (indexChoice === 'full' || indexChoice === 'sample') {
|
||||
const repoReadme = await github.getReadme(repo.url);
|
||||
if (repoReadme) {
|
||||
const textContent = extractTextContent(repoReadme.content);
|
||||
db.addReadme(repoId, textContent, repoReadme.content);
|
||||
indexedReadmes++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Repo returned null (likely 404 - deleted/moved)
|
||||
skipped404s++;
|
||||
}
|
||||
|
||||
repoBar.update(indexedRepos, { name: `Repos: ${repo.name.substring(0, 30)}` });
|
||||
} catch (error) {
|
||||
// Handle rate limit skip
|
||||
if (error.message === 'SKIP_RATE_LIMIT') {
|
||||
console.log(chalk.yellow('\n⚠️ Skipping remaining items due to rate limit...'));
|
||||
break; // Exit repo loop
|
||||
}
|
||||
// Skip failed repos
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Skip failed lists
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
multibar.stop();
|
||||
|
||||
// Summary
|
||||
console.log(goldPink('\n\n✨ INDEX BUILD COMPLETE! ✨\n'));
|
||||
console.log(chalk.hex('#DA22FF')('📊 Summary:'));
|
||||
console.log(chalk.gray('━'.repeat(50)));
|
||||
console.log(chalk.hex('#FF69B4')(` Awesome Lists: ${chalk.bold(listsToIndex.length)}`));
|
||||
console.log(chalk.hex('#FFD700')(` Repositories: ${chalk.bold(indexedRepos)}`));
|
||||
console.log(chalk.hex('#DA22FF')(` READMEs: ${chalk.bold(indexedReadmes)}`));
|
||||
if (skipped404s > 0) {
|
||||
console.log(chalk.hex('#9733EE')(` Skipped (404): ${chalk.bold(skipped404s)} ${chalk.gray('(deleted/moved repos)')}`));
|
||||
}
|
||||
console.log(chalk.gray('━'.repeat(50)));
|
||||
console.log();
|
||||
|
||||
const stats = db.getStats();
|
||||
console.log(chalk.hex('#FF69B4')('🗄️ Total in Database:'));
|
||||
console.log(chalk.gray(` Lists: ${stats.awesomeLists} | Repos: ${stats.repositories} | READMEs: ${stats.readmes}`));
|
||||
console.log();
|
||||
|
||||
console.log(chalk.green('✓ You can now search and explore! Try:\n'));
|
||||
console.log(chalk.gray(' • awesome search "your query"'));
|
||||
console.log(chalk.gray(' • awesome shell'));
|
||||
console.log(chalk.gray(' • awesome browse\n'));
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
buildIndex,
|
||||
parseMarkdownLinks,
|
||||
extractTextContent,
|
||||
isAwesomeList
|
||||
};
|
||||
Reference in New Issue
Block a user