feat: github workflow

This commit is contained in:
valknarness
2025-10-26 13:48:23 +01:00
parent 700c73bcbf
commit 4cdcc62e15
9 changed files with 1605 additions and 3 deletions

248
.github/workflows/build-database.yml vendored Normal file
View File

@@ -0,0 +1,248 @@
name: Build Awesome Database
on:
schedule:
# Run daily at 02:00 UTC
- cron: '0 2 * * *'
workflow_dispatch: # Allow manual triggering
inputs:
index_mode:
description: 'Indexing mode'
required: false
default: 'full'
type: choice
options:
- full
- sample
permissions:
contents: read
actions: write
jobs:
build-database:
runs-on: ubuntu-latest
timeout-minutes: 180 # 3 hours max
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '22'
- name: Setup pnpm
uses: pnpm/action-setup@v3
with:
version: 8
- name: Install dependencies
run: |
pnpm install
pnpm rebuild better-sqlite3
- name: Configure GitHub token for API access
run: |
chmod +x awesome
# Set GitHub token for higher rate limits (5000/hour vs 60/hour)
export GITHUB_TOKEN="${{ secrets.GITHUB_TOKEN }}"
node -e "
const db = require('./lib/database');
const dbOps = require('./lib/db-operations');
db.initialize();
dbOps.setSetting('githubToken', process.env.GITHUB_TOKEN);
db.close();
console.log('GitHub token configured');
"
- name: Build awesome database
id: build
run: |
# Capture start time
START_TIME=$(date -u +"%Y-%m-%d %H:%M:%S UTC")
echo "start_time=$START_TIME" >> $GITHUB_OUTPUT
# Determine index mode
INDEX_MODE="${{ github.event.inputs.index_mode || 'full' }}"
echo "Index mode: $INDEX_MODE"
# Build the index with automated selection
if [ "$INDEX_MODE" = "sample" ]; then
# For sample mode, we'll need to modify the script to accept input
echo "Building sample index (10 lists)..."
timeout 150m node -e "
const indexer = require('./lib/indexer');
(async () => {
try {
// Simulate user choosing 'sample' option
process.stdin.push('sample\n');
await indexer.buildIndex(false);
console.log('Sample index built successfully');
process.exit(0);
} catch (error) {
console.error('Failed to build index:', error.message);
process.exit(1);
}
})();
" || echo "Index building completed with timeout"
else
echo "Building full index..."
timeout 150m node -e "
const indexer = require('./lib/indexer');
(async () => {
try {
// Simulate user choosing 'full' option
process.stdin.push('full\n');
await indexer.buildIndex(false);
console.log('Full index built successfully');
process.exit(0);
} catch (error) {
console.error('Failed to build index:', error.message);
process.exit(1);
}
})();
" || echo "Index building completed with timeout"
fi
# Capture end time
END_TIME=$(date -u +"%Y-%m-%d %H:%M:%S UTC")
echo "end_time=$END_TIME" >> $GITHUB_OUTPUT
- name: Gather database statistics
id: stats
run: |
# Get database stats
STATS=$(node -e "
const db = require('./lib/database');
const dbOps = require('./lib/db-operations');
db.initialize();
const stats = dbOps.getIndexStats();
const dbPath = require('path').join(require('os').homedir(), '.awesome', 'awesome.db');
const fs = require('fs');
const fileSize = fs.existsSync(dbPath) ? fs.statSync(dbPath).size : 0;
const fileSizeMB = (fileSize / (1024 * 1024)).toFixed(2);
console.log(JSON.stringify({
totalLists: stats.totalLists || 0,
totalRepos: stats.totalRepositories || 0,
totalReadmes: stats.totalReadmes || 0,
sizeBytes: fileSize,
sizeMB: fileSizeMB
}));
db.close();
")
echo "Database statistics:"
echo "$STATS" | jq .
# Extract values for outputs
TOTAL_LISTS=$(echo "$STATS" | jq -r '.totalLists')
TOTAL_REPOS=$(echo "$STATS" | jq -r '.totalRepos')
TOTAL_READMES=$(echo "$STATS" | jq -r '.totalReadmes')
SIZE_MB=$(echo "$STATS" | jq -r '.sizeMB')
echo "total_lists=$TOTAL_LISTS" >> $GITHUB_OUTPUT
echo "total_repos=$TOTAL_REPOS" >> $GITHUB_OUTPUT
echo "total_readmes=$TOTAL_READMES" >> $GITHUB_OUTPUT
echo "size_mb=$SIZE_MB" >> $GITHUB_OUTPUT
- name: Prepare database artifact
run: |
# Copy database from home directory
DB_PATH="$HOME/.awesome/awesome.db"
if [ ! -f "$DB_PATH" ]; then
echo "Error: Database file not found at $DB_PATH"
exit 1
fi
# Create artifact directory
mkdir -p artifacts
# Copy database with timestamp
BUILD_DATE=$(date -u +"%Y%m%d-%H%M%S")
cp "$DB_PATH" "artifacts/awesome-${BUILD_DATE}.db"
cp "$DB_PATH" "artifacts/awesome-latest.db"
# Create metadata file
cat > artifacts/metadata.json <<EOF
{
"build_date": "$(date -u +"%Y-%m-%d %H:%M:%S UTC")",
"build_timestamp": "$(date -u +%s)",
"git_sha": "${{ github.sha }}",
"workflow_run_id": "${{ github.run_id }}",
"total_lists": ${{ steps.stats.outputs.total_lists }},
"total_repos": ${{ steps.stats.outputs.total_repos }},
"total_readmes": ${{ steps.stats.outputs.total_readmes }},
"size_mb": ${{ steps.stats.outputs.size_mb }},
"node_version": "$(node --version)",
"index_mode": "${{ github.event.inputs.index_mode || 'full' }}"
}
EOF
echo "Artifact prepared: awesome-${BUILD_DATE}.db"
ls -lh artifacts/
- name: Upload database artifact
uses: actions/upload-artifact@v4
with:
name: awesome-database-${{ github.run_id }}
path: |
artifacts/awesome-*.db
artifacts/metadata.json
retention-days: 90
compression-level: 9
- name: Create build summary
run: |
cat >> $GITHUB_STEP_SUMMARY <<EOF
# 🎉 Awesome Database Build Complete
## 📊 Statistics
| Metric | Value |
|--------|-------|
| 📚 Total Lists | ${{ steps.stats.outputs.total_lists }} |
| 📦 Total Repositories | ${{ steps.stats.outputs.total_repos }} |
| 📖 Total READMEs | ${{ steps.stats.outputs.total_readmes }} |
| 💾 Database Size | ${{ steps.stats.outputs.size_mb }} MB |
## ⏱️ Build Information
- **Started:** ${{ steps.build.outputs.start_time }}
- **Completed:** ${{ steps.build.outputs.end_time }}
- **Workflow Run:** [\#${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
- **Commit:** \`${{ github.sha }}\`
- **Index Mode:** ${{ github.event.inputs.index_mode || 'full' }}
## 📥 Download Instructions
\`\`\`bash
# Using GitHub CLI
gh run download ${{ github.run_id }} -n awesome-database-${{ github.run_id }}
# Or using our helper script
curl -sSL https://raw.githubusercontent.com/${{ github.repository }}/main/scripts/download-db.sh | bash
\`\`\`
## 🔗 Artifact Link
[Download Database Artifact](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
EOF
- name: Notify on failure
if: failure()
run: |
cat >> $GITHUB_STEP_SUMMARY <<EOF
# ❌ Database Build Failed
The automated database build encountered an error.
**Workflow Run:** [\#${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
Please check the logs for details.
EOF

248
.github/workflows/cleanup-artifacts.yml vendored Normal file
View File

@@ -0,0 +1,248 @@
name: Cleanup Old Artifacts
on:
schedule:
# Run daily at 03:00 UTC (after database build)
- cron: '0 3 * * *'
workflow_dispatch: # Allow manual triggering
inputs:
retention_days:
description: 'Keep artifacts newer than X days'
required: false
default: '30'
type: string
dry_run:
description: 'Dry run (list only, do not delete)'
required: false
default: false
type: boolean
permissions:
actions: write
contents: read
jobs:
cleanup:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup GitHub CLI
run: |
# GitHub CLI is pre-installed on ubuntu-latest
gh --version
- name: List and cleanup old artifacts
env:
GH_TOKEN: ${{ github.token }}
run: |
# Configuration
RETENTION_DAYS="${{ github.event.inputs.retention_days || '30' }}"
DRY_RUN="${{ github.event.inputs.dry_run || 'false' }}"
CUTOFF_DATE=$(date -u -d "$RETENTION_DAYS days ago" +%s)
echo "🧹 Artifact Cleanup Configuration"
echo "=================================="
echo "Retention days: $RETENTION_DAYS"
echo "Cutoff date: $(date -u -d "@$CUTOFF_DATE" '+%Y-%m-%d %H:%M:%S UTC')"
echo "Dry run: $DRY_RUN"
echo ""
# Get all artifacts
echo "📦 Fetching artifacts..."
ARTIFACTS=$(gh api \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/repos/${{ github.repository }}/actions/artifacts?per_page=100" \
| jq -r '.artifacts[]')
if [ -z "$ARTIFACTS" ]; then
echo "No artifacts found"
exit 0
fi
# Initialize counters
TOTAL_COUNT=0
DELETE_COUNT=0
KEEP_COUNT=0
SPACE_FREED=0
# Create summary arrays
declare -a TO_DELETE
declare -a TO_KEEP
# Process each artifact
while IFS= read -r artifact; do
ID=$(echo "$artifact" | jq -r '.id')
NAME=$(echo "$artifact" | jq -r '.name')
SIZE=$(echo "$artifact" | jq -r '.size_in_bytes')
CREATED=$(echo "$artifact" | jq -r '.created_at')
EXPIRED=$(echo "$artifact" | jq -r '.expired')
# Convert created_at to timestamp
CREATED_TS=$(date -u -d "$CREATED" +%s)
TOTAL_COUNT=$((TOTAL_COUNT + 1))
# Check if artifact should be deleted
if [ "$EXPIRED" = "true" ] || [ $CREATED_TS -lt $CUTOFF_DATE ]; then
DELETE_COUNT=$((DELETE_COUNT + 1))
SPACE_FREED=$((SPACE_FREED + SIZE))
SIZE_MB=$(echo "scale=2; $SIZE / 1024 / 1024" | bc)
AGE_DAYS=$(echo "($CUTOFF_DATE - $CREATED_TS) / 86400" | bc)
TO_DELETE+=("| $NAME | $SIZE_MB MB | $(date -u -d "$CREATED" '+%Y-%m-%d') | $AGE_DAYS days old |")
if [ "$DRY_RUN" = "false" ]; then
echo "🗑️ Deleting: $NAME (ID: $ID, Size: $SIZE_MB MB, Age: $AGE_DAYS days)"
gh api \
--method DELETE \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/repos/${{ github.repository }}/actions/artifacts/$ID" \
&& echo " ✓ Deleted" || echo " ✗ Failed to delete"
else
echo "🔍 Would delete: $NAME (ID: $ID, Size: $SIZE_MB MB, Age: $AGE_DAYS days)"
fi
else
KEEP_COUNT=$((KEEP_COUNT + 1))
SIZE_MB=$(echo "scale=2; $SIZE / 1024 / 1024" | bc)
AGE_DAYS=$(echo "($(date +%s) - $CREATED_TS) / 86400" | bc)
TO_KEEP+=("| $NAME | $SIZE_MB MB | $(date -u -d "$CREATED" '+%Y-%m-%d') | $AGE_DAYS days old |")
fi
done < <(gh api \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/repos/${{ github.repository }}/actions/artifacts?per_page=100" \
| jq -c '.artifacts[]')
# Calculate space freed in MB
SPACE_FREED_MB=$(echo "scale=2; $SPACE_FREED / 1024 / 1024" | bc)
echo ""
echo "📊 Cleanup Summary"
echo "=================="
echo "Total artifacts: $TOTAL_COUNT"
echo "Deleted/Would delete: $DELETE_COUNT"
echo "Kept: $KEEP_COUNT"
echo "Space freed: $SPACE_FREED_MB MB"
# Create GitHub Actions summary
cat >> $GITHUB_STEP_SUMMARY <<EOF
# 🧹 Artifact Cleanup Report
## Configuration
- **Retention Period:** $RETENTION_DAYS days
- **Cutoff Date:** $(date -u -d "@$CUTOFF_DATE" '+%Y-%m-%d %H:%M:%S UTC')
- **Mode:** $([ "$DRY_RUN" = "true" ] && echo "🔍 Dry Run" || echo "🗑️ Delete")
## Summary
| Metric | Value |
|--------|-------|
| 📦 Total Artifacts | $TOTAL_COUNT |
| 🗑️ Deleted | $DELETE_COUNT |
| ✅ Kept | $KEEP_COUNT |
| 💾 Space Freed | $SPACE_FREED_MB MB |
EOF
# Add deleted artifacts table if any
if [ $DELETE_COUNT -gt 0 ]; then
cat >> $GITHUB_STEP_SUMMARY <<EOF
## 🗑️ Deleted Artifacts
| Name | Size | Created | Age |
|------|------|---------|-----|
EOF
printf '%s\n' "${TO_DELETE[@]}" >> $GITHUB_STEP_SUMMARY
fi
# Add kept artifacts table if any
if [ $KEEP_COUNT -gt 0 ]; then
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ✅ Kept Artifacts (Recent)
| Name | Size | Created | Age |
|------|------|---------|-----|
EOF
# Show only first 10 kept artifacts
printf '%s\n' "${TO_KEEP[@]:0:10}" >> $GITHUB_STEP_SUMMARY
if [ $KEEP_COUNT -gt 10 ]; then
echo "" >> $GITHUB_STEP_SUMMARY
echo "*...and $((KEEP_COUNT - 10)) more*" >> $GITHUB_STEP_SUMMARY
fi
fi
cat >> $GITHUB_STEP_SUMMARY <<EOF
## 📋 Next Scheduled Run
This workflow runs daily at 03:00 UTC to maintain artifact storage.
**Manual trigger:** You can also run this workflow manually from the Actions tab with custom retention settings.
EOF
- name: Cleanup workflow runs
if: github.event.inputs.dry_run != 'true'
env:
GH_TOKEN: ${{ github.token }}
run: |
echo ""
echo "🧹 Cleaning up old workflow runs..."
# Keep only last 50 workflow runs
RUNS_TO_DELETE=$(gh api \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/repos/${{ github.repository }}/actions/runs?per_page=100" \
| jq -r '.workflow_runs[50:] | .[].id')
if [ -z "$RUNS_TO_DELETE" ]; then
echo "No old workflow runs to delete"
else
DELETE_COUNT=0
for RUN_ID in $RUNS_TO_DELETE; do
# Only delete runs older than 30 days
RUN_DATE=$(gh api \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/repos/${{ github.repository }}/actions/runs/$RUN_ID" \
| jq -r '.created_at')
RUN_TS=$(date -u -d "$RUN_DATE" +%s)
CUTOFF=$(date -u -d "30 days ago" +%s)
if [ $RUN_TS -lt $CUTOFF ]; then
echo "Deleting workflow run: $RUN_ID"
gh api \
--method DELETE \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/repos/${{ github.repository }}/actions/runs/$RUN_ID" \
&& DELETE_COUNT=$((DELETE_COUNT + 1)) || echo "Failed to delete run $RUN_ID"
fi
done
echo "Deleted $DELETE_COUNT old workflow runs"
fi
- name: Report failures
if: failure()
run: |
cat >> $GITHUB_STEP_SUMMARY <<EOF
# ❌ Cleanup Failed
The artifact cleanup workflow encountered an error.
Please check the logs for details.
EOF

151
CLAUDE.md Normal file
View File

@@ -0,0 +1,151 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project Overview
AWESOME is a full-featured CLI application for exploring, curating, and managing awesome lists from GitHub. It provides an interactive terminal interface with search, bookmarking, custom list creation, and export capabilities.
## Core Technologies
- **Node.js 22+** - Required runtime
- **better-sqlite3** - Embedded database with FTS5 full-text search
- **commander.js** - CLI framework and command routing
- **inquirer.js** - Interactive prompts and menus
- **marked + marked-terminal** - Terminal markdown rendering
- **axios** - GitHub API client with OAuth support
## Commands
### Development
```bash
pnpm install # Install dependencies
pnpm rebuild better-sqlite3 # Rebuild native module
chmod +x awesome # Make executable
node awesome # Run application
node --inspect=9230 awesome # Debug mode
```
### Key Application Commands
```bash
./awesome # Interactive menu (default)
./awesome db # Download pre-built database from GitHub Actions
./awesome index # Build/rebuild index locally (first run required)
./awesome index -f # Force rebuild (clears data)
./awesome search "query" # Quick search
./awesome shell # Interactive shell
./awesome settings # Configure GitHub OAuth
```
## Architecture
### Entry Point
- `awesome` - Main executable that initializes database and sets up Commander routes
### Data Flow
1. **Indexing** (`lib/indexer.js`) - Fetches sindresorhus/awesome, recursively crawls awesome lists
2. **Storage** (`lib/database.js`) - Creates schema; (`lib/db-operations.js`) - CRUD operations
3. **Search** (`lib/search.js`) - FTS5 queries across indexed READMEs
4. **Display** (`lib/viewer.js`) - Markdown rendering with pagination
### Database Schema
Located at `~/.awesome/awesome.db` with these key tables:
- `awesome_lists` - Hierarchical list storage (with parent_id foreign key)
- `repositories` - Individual projects with GitHub metadata
- `readmes` - Full README content with version hashing
- `readmes_fts` - FTS5 virtual table for full-text search
- `bookmarks` - User favorites with tags/categories
- `custom_lists` + `custom_list_items` - User-curated lists
- `settings` - App configuration including GitHub tokens
### Module Responsibilities
**Core Operations:**
- `lib/database.js` - Schema creation, connection management, lifecycle
- `lib/db-operations.js` - All SQL queries and data operations
- `lib/github-api.js` - GitHub API wrapper with rate limiting
- `lib/github-oauth.js` - Device flow OAuth authentication
**Features:**
- `lib/indexer.js` - Recursive crawler for awesome lists, parses markdown links
- `lib/search.js` - FTS5 search interface with interactive result selection
- `lib/viewer.js` - Paginated README viewer with annotations
- `lib/bookmarks.js` - Bookmark management (add/remove/tag/export)
- `lib/custom-lists.js` - User list creation and export
- `lib/browser.js` - Hierarchical list navigation
- `lib/shell.js` - Command shell with history (~/.awesome/shell_history.txt)
- `lib/random.js` - Random README discovery
- `lib/history.js` - Reading activity tracking
- `lib/stats.js` - Index statistics dashboard
- `lib/checkout.js` - Git clone integration
- `lib/settings.js` - Configuration management
- `lib/db-download.js` - Download pre-built databases from GitHub Actions artifacts
**UI:**
- `lib/menu.js` - Main interactive menu
- `lib/banner.js` - Gradient color scheme (purple/pink/gold theme), headers
### GitHub API Integration
**Rate Limiting:**
- Unauthenticated: 60 requests/hour
- OAuth authenticated: 5,000 requests/hour
- Rate limit handler in `lib/github-api.js` prompts user to wait/skip/abort
- OAuth setup via device flow in `lib/github-oauth.js`
**Indexing Strategy:**
- Parses markdown with regex: `- [Name](url) - Description`
- Detects awesome lists vs regular repos by name/description patterns
- Recursive crawling with level tracking for hierarchy
- Stores raw and processed content for diff-based updates
### Search Architecture
**FTS5 Implementation:**
- Indexes: repository name, description, content, tags, categories
- Content preprocessing in `lib/indexer.js`: strips code blocks, HTML, normalizes whitespace
- Query through `db-operations.js:searchReadmes()` using MATCH operator
- Results ranked by BM25 relevance score
### Export Capabilities
- Markdown with awesome-style badges
- JSON structured data
- PDF/EPUB via `markdown-pdf` and `epub-gen` (dependencies installed)
## Development Patterns
### Error Handling
- GitHub API errors display user-friendly prompts with wait/skip/abort options
- Rate limit detection via response headers (`x-ratelimit-remaining`)
- Special error code `'SKIP_RATE_LIMIT'` to skip remaining items
### Database Operations
- Uses prepared statements exclusively
- Foreign keys enabled with cascade deletes
- Content versioning via SHA256 hashing
- WAL mode for concurrent access
### UI Patterns
- Consistent gradient theme via `banner.js` color functions
- Loading states using `ora` and `nanospinner`
- Progress bars via `cli-progress` for batch operations
- Tables with `cli-table3` for result display
### State Management
- Database is single source of truth
- Shell history persisted to `~/.awesome/shell_history.txt`
- Settings stored in database `settings` table as key-value pairs
- OAuth tokens encrypted and stored in settings
## Important Notes
- **First Run:** Two options:
- Fast: `./awesome db` to download pre-built database from GitHub Actions
- Slow: `./awesome index` to build the search index locally (1-2 hours)
- **Native Module:** better-sqlite3 requires rebuild after installation
- **OAuth Recommended:** Dramatically increases API rate limits (see `OAUTH_SETUP.md`)
- **Data Location:** All data in `~/.awesome/` directory
- **Hierarchical Structure:** awesome_lists table uses parent_id for tree relationships
- **Content Hashing:** README versions tracked by SHA256 to enable smart updates with diffs
- **GitHub Actions:** Automated workflows build database daily and clean up old artifacts
- **Artifact Download:** CLI command `./awesome db` provides interactive database download

View File

@@ -11,7 +11,31 @@ chmod +x awesome
## First Run
1. **Build the Index** (required first step)
### Option 1: Download Pre-Built Database (Fast! ⚡)
1. **Download Database** (takes ~1 minute)
```bash
./awesome db
```
Or use the script:
```bash
./scripts/download-db.sh
```
This will:
- Show available database builds
- Let you select one to download
- Automatically install it
- Backup your existing database (if any)
2. **Start Exploring**
```bash
./awesome
```
Opens the beautiful interactive menu with all features!
### Option 2: Build Index Locally (Slow - 1-2 hours)
1. **Build the Index** (takes 1-2 hours)
```bash
./awesome index
```
@@ -61,7 +85,8 @@ awesome> help
| Command | Description |
|---------|-------------|
| `./awesome` | Interactive menu (recommended) |
| `./awesome index` | Build/rebuild index |
| `./awesome db` | Download pre-built database ⚡ |
| `./awesome index` | Build/rebuild index locally |
| `./awesome search "query"` | Quick search |
| `./awesome shell` | Interactive shell |
| `./awesome browse` | Browse awesome lists |

126
README.md
View File

@@ -38,11 +38,69 @@ Beautiful purple, pink, and gold gradient color scheme throughout the entire app
## 📦 Installation
### Option 1: Use Pre-Built Database (Recommended) ⚡
Skip the lengthy indexing process! Download a pre-built database that's automatically updated daily.
```bash
# Clone the repository
git clone https://github.com/YOUR_USERNAME/awesome.git
cd awesome
# Install dependencies
pnpm install
pnpm rebuild better-sqlite3
chmod +x awesome
# Download pre-built database (easiest - uses GitHub CLI)
./awesome db
# Or use the standalone script
./scripts/download-db.sh
# Start using immediately!
./awesome
```
**Database is rebuilt daily** by GitHub Actions with full indexing of all awesome lists!
**Two ways to download:**
- `./awesome db` - Built-in command with interactive menu
- `./scripts/download-db.sh` - Standalone script with more options
#### Download Database Manually
If you prefer manual download or the script doesn't work:
```bash
# Install GitHub CLI if needed
# macOS: brew install gh
# Ubuntu: sudo apt install gh
# Windows: winget install GitHub.cli
# Authenticate with GitHub
gh auth login
# Download latest database artifact
gh run download --repo YOUR_USERNAME/awesome -n awesome-database-latest
# Move to correct location
mkdir -p ~/.awesome
cp awesome-*.db ~/.awesome/awesome.db
```
### Option 2: Build Database Locally 🔨
Build the index yourself (takes 1-2 hours for full indexing):
```bash
cd /home/valknar/Projects/node.js/awesome
pnpm install
pnpm rebuild better-sqlite3
chmod +x awesome
# Build the index
./awesome index
```
## ⚡ GitHub Rate Limits - SOLVED with OAuth! 🔐
@@ -86,7 +144,10 @@ See [OAUTH_SETUP.md](OAUTH_SETUP.md) for complete guide!
### Commands
```bash
# Build the index (run this first!)
# Download pre-built database (fast!)
./awesome db
# Build the index locally (slow - 1-2 hours)
./awesome index
# Search
@@ -164,6 +225,69 @@ The application uses SQLite3 with FTS5 for full-text search. Data is stored in `
- **Ora & Nanospinner** - Loading animations
- **pnpm** - Fast, efficient package manager
## 🤖 Automated Database Builds
The repository includes GitHub Actions workflows for automated database management:
### Daily Database Build
**Schedule:** Runs daily at 02:00 UTC
**What it does:**
- Fetches all awesome lists from [sindresorhus/awesome](https://github.com/sindresorhus/awesome)
- Recursively indexes all README files
- Collects GitHub metadata (stars, forks, etc.)
- Compresses and uploads database as artifact
- Generates build report with statistics
**Manual Trigger:**
You can manually trigger a database build from the GitHub Actions tab:
```bash
gh workflow run build-database.yml -f index_mode=full
```
**Artifact Details:**
- **Retention:** 90 days
- **Size:** ~50-200MB (compressed)
- **Contains:** Full database + metadata JSON
- **Naming:** `awesome-database-{run_id}`
### Artifact Cleanup
**Schedule:** Runs daily at 03:00 UTC (after database build)
**What it does:**
- Removes artifacts older than 30 days (configurable)
- Cleans up old workflow runs
- Generates cleanup report
- Dry-run mode available for testing
**Manual Trigger:**
```bash
# Standard cleanup (30 days retention)
gh workflow run cleanup-artifacts.yml
# Custom retention period
gh workflow run cleanup-artifacts.yml -f retention_days=60
# Dry run (preview only)
gh workflow run cleanup-artifacts.yml -f dry_run=true
```
### Download Helper Script
The `scripts/download-db.sh` script provides an interactive interface to:
- List available database builds
- View build metadata (date, size, commit)
- Download and install selected database
- Backup existing database automatically
**Features:**
- Interactive selection menu
- Automatic backup of existing databases
- GitHub CLI integration
- Cross-platform support (Linux, macOS, Windows/Git Bash)
## 📝 License
MIT

428
WORKFLOWS.md Normal file
View File

@@ -0,0 +1,428 @@
# GitHub Actions Workflows
This document describes the automated workflows for building and managing the Awesome database.
## Overview
Two workflows automate database management:
1. **Build Database** - Creates a fresh database daily
2. **Cleanup Artifacts** - Removes old artifacts to save storage
## Build Database Workflow
**File:** `.github/workflows/build-database.yml`
### Schedule
- **Automatic:** Daily at 02:00 UTC
- **Manual:** Can be triggered via GitHub Actions UI or CLI
### Features
#### Automatic Daily Builds
- Fetches [sindresorhus/awesome](https://github.com/sindresorhus/awesome)
- Recursively indexes all awesome lists
- Collects GitHub metadata (stars, forks, last commit)
- Generates full-text search index
- Compresses and uploads as artifact
#### Build Modes
**Full Mode** (default):
- Indexes all awesome lists
- Takes ~2-3 hours
- Produces comprehensive database
**Sample Mode**:
- Indexes random sample of 10 lists
- Takes ~5-10 minutes
- Good for testing
#### GitHub Token Integration
- Uses `GITHUB_TOKEN` secret for API access
- Provides 5,000 requests/hour (vs 60 without auth)
- Automatically configured during build
### Manual Triggering
#### Via GitHub CLI
```bash
# Trigger full build
gh workflow run build-database.yml -f index_mode=full
# Trigger sample build (for testing)
gh workflow run build-database.yml -f index_mode=sample
# Check workflow status
gh run list --workflow=build-database.yml
# View specific run
gh run view <run-id>
```
#### Via GitHub UI
1. Go to repository → Actions tab
2. Select "Build Awesome Database" workflow
3. Click "Run workflow" button
4. Choose index mode (full/sample)
5. Click "Run workflow"
### Outputs
#### Artifacts Uploaded
- `awesome-{timestamp}.db` - Timestamped database file
- `awesome-latest.db` - Always points to newest build
- `metadata.json` - Build information
**Artifact Naming:** `awesome-database-{run_id}`
**Retention:** 90 days
#### Metadata Structure
```json
{
"build_date": "2025-10-26 02:15:43 UTC",
"build_timestamp": 1730000143,
"git_sha": "abc123...",
"workflow_run_id": "12345678",
"total_lists": 450,
"total_repos": 15000,
"total_readmes": 12500,
"size_mb": 156.42,
"node_version": "v22.0.0",
"index_mode": "full"
}
```
#### Build Summary
Each run generates a summary with:
- Statistics (lists, repos, READMEs, size)
- Build timing information
- Download instructions
- Direct artifact link
### Monitoring
#### Check Recent Runs
```bash
# List last 10 runs
gh run list --workflow=build-database.yml --limit 10
# Show only failed runs
gh run list --workflow=build-database.yml --status failure
# Watch current run
gh run watch
```
#### View Build Logs
```bash
# Show logs for specific run
gh run view <run-id> --log
# Show only failed steps
gh run view <run-id> --log-failed
```
## Cleanup Artifacts Workflow
**File:** `.github/workflows/cleanup-artifacts.yml`
### Schedule
- **Automatic:** Daily at 03:00 UTC (after database build)
- **Manual:** Can be triggered with custom settings
### Features
#### Automatic Cleanup
- Removes artifacts older than 30 days (default)
- Cleans up old workflow runs (>30 days, keeping last 50)
- Generates detailed cleanup report
- Dry-run mode available
#### Configurable Retention
- Default: 30 days
- Can be customized per run
- Artifacts within retention period are preserved
### Manual Triggering
#### Via GitHub CLI
```bash
# Standard cleanup (30 days)
gh workflow run cleanup-artifacts.yml
# Custom retention period (60 days)
gh workflow run cleanup-artifacts.yml -f retention_days=60
# Dry run (preview only, no deletions)
gh workflow run cleanup-artifacts.yml -f dry_run=true -f retention_days=30
# Aggressive cleanup (7 days)
gh workflow run cleanup-artifacts.yml -f retention_days=7
```
#### Via GitHub UI
1. Go to repository → Actions tab
2. Select "Cleanup Old Artifacts" workflow
3. Click "Run workflow" button
4. Configure options:
- **retention_days**: Days to keep (default: 30)
- **dry_run**: Preview mode (default: false)
5. Click "Run workflow"
### Cleanup Report
Each run generates a detailed report showing:
#### Summary Statistics
- Total artifacts scanned
- Number deleted
- Number kept
- Storage space freed (MB)
#### Deleted Artifacts Table
- Artifact name
- Size
- Creation date
- Age (in days)
#### Kept Artifacts Table
- Recently created artifacts
- Artifacts within retention period
- Limited to first 10 for brevity
### Storage Management
#### Checking Storage Usage
```bash
# List all artifacts with sizes
gh api repos/:owner/:repo/actions/artifacts \
| jq -r '.artifacts[] | "\(.name) - \(.size_in_bytes / 1024 / 1024 | floor)MB - \(.created_at)"'
# Calculate total storage
gh api repos/:owner/:repo/actions/artifacts \
| jq '[.artifacts[].size_in_bytes] | add / 1024 / 1024 | floor'
```
#### Retention Strategy
**Recommended settings:**
- **Production:** 30-60 days retention
- **Development:** 14-30 days retention
- **Testing:** 7-14 days retention
**Storage limits:**
- Free GitHub: Limited artifact storage
- GitHub Pro: More generous limits
- GitHub Team/Enterprise: Higher limits
## Downloading Databases
### Method 1: Interactive Script (Recommended)
```bash
./scripts/download-db.sh
```
**Features:**
- Lists all available builds
- Shows metadata (date, size, commit)
- Interactive selection
- Automatic backup of existing database
- Progress indication
**Usage:**
```bash
# Interactive mode
./scripts/download-db.sh
# Specify repository
./scripts/download-db.sh --repo owner/awesome
# Download latest automatically
./scripts/download-db.sh --repo owner/awesome --latest
```
### Method 2: GitHub CLI Direct
```bash
# List available artifacts
gh api repos/OWNER/REPO/actions/artifacts | jq -r '.artifacts[].name'
# Download specific run
gh run download <run-id> -n awesome-database-<run-id>
# Extract and install
mkdir -p ~/.awesome
cp awesome-*.db ~/.awesome/awesome.db
```
### Method 3: GitHub API
```bash
# Get latest successful run
RUN_ID=$(gh api repos/OWNER/REPO/actions/workflows/build-database.yml/runs \
| jq -r '.workflow_runs[0].id')
# Download artifact
gh run download $RUN_ID -n awesome-database-$RUN_ID
```
## Troubleshooting
### Build Failures
**Problem:** Workflow fails during indexing
**Solutions:**
1. Check API rate limits
2. Review build logs: `gh run view <run-id> --log-failed`
3. Try sample mode for testing
4. Check GitHub status page
**Common Issues:**
- GitHub API rate limiting
- Network timeouts
- Invalid awesome list URLs
### Download Issues
**Problem:** Cannot download artifacts
**Solutions:**
1. Ensure GitHub CLI is authenticated: `gh auth status`
2. Check artifact exists: `gh run list --workflow=build-database.yml`
3. Verify artifact hasn't expired (90 days)
4. Try alternative download method
### Storage Issues
**Problem:** Running out of artifact storage
**Solutions:**
1. Reduce retention period: `gh workflow run cleanup-artifacts.yml -f retention_days=14`
2. Run manual cleanup: `gh workflow run cleanup-artifacts.yml`
3. Check current usage with GitHub API
4. Consider upgrading GitHub plan
### Permission Issues
**Problem:** Workflow lacks permissions
**Solutions:**
1. Verify `GITHUB_TOKEN` has required scopes
2. Check workflow permissions in `.yml` file
3. Review repository settings → Actions → General
## Best Practices
### For Maintainers
1. **Monitor Build Success Rate**
- Set up notifications for failed builds
- Review logs regularly
- Keep dependencies updated
2. **Optimize Build Times**
- Use sample mode for development
- Cache dependencies when possible
- Monitor for slow API responses
3. **Manage Storage**
- Run cleanups regularly
- Adjust retention based on usage
- Archive important builds
4. **Documentation**
- Keep artifact metadata updated
- Document any custom configurations
- Update README with changes
### For Users
1. **Download Strategy**
- Use latest builds for current data
- Check metadata before downloading
- Keep local backup of preferred versions
2. **Update Frequency**
- Daily builds provide fresh data
- Weekly downloads usually sufficient
- On-demand for specific needs
3. **Storage Management**
- Clean old local databases
- Use compression for backups
- Verify database integrity after download
## Advanced Usage
### Custom Build Scripts
You can create custom workflows based on the provided templates:
```yaml
# Example: Weekly comprehensive build
name: Weekly Full Index
on:
schedule:
- cron: '0 0 * * 0' # Sundays at midnight
workflow_dispatch:
jobs:
build:
uses: ./.github/workflows/build-database.yml
with:
index_mode: full
```
### Notification Integration
Add notifications to workflow:
```yaml
- name: Notify on completion
if: always()
run: |
# Send to Slack, Discord, email, etc.
curl -X POST $WEBHOOK_URL -d "Build completed: ${{ job.status }}"
```
### Multi-Platform Builds
Extend workflow for different platforms:
```yaml
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
node-version: [22, 20, 18]
```
## Resources
- [GitHub Actions Documentation](https://docs.github.com/en/actions)
- [GitHub CLI Manual](https://cli.github.com/manual/)
- [Artifact Storage Limits](https://docs.github.com/en/billing/managing-billing-for-github-actions/about-billing-for-github-actions)
- [Workflow Syntax](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions)
## Support
For issues or questions:
1. Check this documentation
2. Review workflow logs
3. Open an issue in the repository
4. Consult GitHub Actions documentation

View File

@@ -110,6 +110,14 @@ program
await checkout.cloneRepository(repo, options.directory);
});
program
.command('db')
.description('Download pre-built database from GitHub Actions')
.action(async () => {
const dbDownload = require('./lib/db-download');
await dbDownload.manage();
});
// If no command is provided, show the main menu
if (process.argv.length === 2) {
(async () => {

364
lib/db-download.js Normal file
View File

@@ -0,0 +1,364 @@
const { execSync, spawn } = require('child_process');
const fs = require('fs');
const path = require('path');
const os = require('os');
const inquirer = require('inquirer');
const chalk = require('chalk');
const ora = require('ora');
const { purpleGold, pinkPurple, goldPink, sectionHeader } = require('./banner');
const DB_DIR = path.join(os.homedir(), '.awesome');
const DB_FILE = path.join(DB_DIR, 'awesome.db');
// Check if GitHub CLI is installed
function checkGhCli() {
try {
execSync('gh --version', { stdio: 'ignore' });
return true;
} catch {
return false;
}
}
// Check if authenticated with GitHub CLI
function checkGhAuth() {
try {
execSync('gh auth status', { stdio: 'ignore' });
return true;
} catch {
return false;
}
}
// Get repository from git remote
function getRepository() {
try {
const remote = execSync('git remote get-url origin', { encoding: 'utf-8' }).trim();
const match = remote.match(/github\.com[:/]([^/]+\/[^/]+?)(\.git)?$/);
if (match) {
return match[1];
}
} catch {
// Not a git repository or no remote
}
return null;
}
// Fetch workflow runs
async function fetchWorkflowRuns(repo, limit = 10) {
try {
const output = execSync(
`gh api -H "Accept: application/vnd.github+json" "/repos/${repo}/actions/workflows/build-database.yml/runs?per_page=${limit}&status=success"`,
{ encoding: 'utf-8' }
);
const data = JSON.parse(output);
return data.workflow_runs || [];
} catch (error) {
throw new Error(`Failed to fetch workflow runs: ${error.message}`);
}
}
// Fetch artifacts for a run
async function fetchArtifacts(repo, runId) {
try {
const output = execSync(
`gh api -H "Accept: application/vnd.github+json" "/repos/${repo}/actions/runs/${runId}/artifacts"`,
{ encoding: 'utf-8' }
);
const data = JSON.parse(output);
return data.artifacts || [];
} catch (error) {
throw new Error(`Failed to fetch artifacts: ${error.message}`);
}
}
// Format date
function formatDate(dateString) {
const date = new Date(dateString);
return date.toLocaleString('en-US', {
year: 'numeric',
month: 'short',
day: '2-digit',
hour: '2-digit',
minute: '2-digit'
});
}
// Format size
function formatSize(bytes) {
const mb = bytes / (1024 * 1024);
return `${mb.toFixed(1)} MB`;
}
// List available databases
async function listDatabases(repo) {
const spinner = ora(chalk.hex('#DA22FF')('Fetching available databases...')).start();
try {
const runs = await fetchWorkflowRuns(repo, 10);
if (runs.length === 0) {
spinner.fail(chalk.red('No database builds found'));
return null;
}
// Fetch artifacts for each run
const runsWithArtifacts = [];
for (const run of runs) {
const artifacts = await fetchArtifacts(repo, run.id);
const dbArtifact = artifacts.find(a => a.name.startsWith('awesome-database'));
if (dbArtifact) {
runsWithArtifacts.push({
runId: run.id,
createdAt: run.created_at,
sha: run.head_sha.substring(0, 7),
artifact: dbArtifact
});
}
}
spinner.succeed(chalk.green(`Found ${runsWithArtifacts.length} available databases`));
if (runsWithArtifacts.length === 0) {
return null;
}
return runsWithArtifacts;
} catch (error) {
spinner.fail(chalk.red(error.message));
return null;
}
}
// Download and install database
async function downloadDatabase(repo, runId, artifactName) {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'awesome-db-'));
try {
// Download artifact
const spinner = ora(chalk.hex('#FF69B4')('Downloading database...')).start();
const downloadProcess = spawn('gh', ['run', 'download', runId, '-R', repo, '-D', tempDir], {
stdio: 'pipe'
});
await new Promise((resolve, reject) => {
downloadProcess.on('close', (code) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`Download failed with code ${code}`));
}
});
downloadProcess.on('error', reject);
});
spinner.succeed(chalk.green('Downloaded successfully'));
// Find database file
const files = fs.readdirSync(tempDir, { recursive: true, withFileTypes: true });
const dbFile = files.find(f => f.isFile() && f.name.endsWith('.db'));
if (!dbFile) {
throw new Error('Database file not found in artifact');
}
const dbPath = path.join(dbFile.path || tempDir, dbFile.name);
// Backup existing database
if (fs.existsSync(DB_FILE)) {
const backupFile = `${DB_FILE}.backup.${Date.now()}`;
console.log(chalk.yellow(`\n⚠️ Backing up existing database to:`));
console.log(chalk.gray(` ${backupFile}`));
fs.copyFileSync(DB_FILE, backupFile);
}
// Create directory if needed
if (!fs.existsSync(DB_DIR)) {
fs.mkdirSync(DB_DIR, { recursive: true });
}
// Copy database
fs.copyFileSync(dbPath, DB_FILE);
const size = fs.statSync(DB_FILE).size;
console.log(chalk.green(`\n✓ Database installed successfully!`));
console.log(chalk.gray(` Location: ${DB_FILE}`));
console.log(chalk.gray(` Size: ${formatSize(size)}`));
// Show metadata if available
const metadataFile = files.find(f => f.isFile() && f.name === 'metadata.json');
if (metadataFile) {
const metadataPath = path.join(metadataFile.path || tempDir, metadataFile.name);
const metadata = JSON.parse(fs.readFileSync(metadataPath, 'utf-8'));
console.log(chalk.hex('#FFD700')('\n📊 Build Information:'));
console.log(chalk.gray(` Build Date: ${metadata.build_date}`));
console.log(chalk.gray(` Total Lists: ${metadata.total_lists}`));
console.log(chalk.gray(` Total Repos: ${metadata.total_repos}`));
console.log(chalk.gray(` Total READMEs: ${metadata.total_readmes}`));
console.log(chalk.gray(` Index Mode: ${metadata.index_mode}`));
}
} finally {
// Cleanup temp directory
fs.rmSync(tempDir, { recursive: true, force: true });
}
}
// Main function
async function manage() {
console.clear();
sectionHeader('DATABASE DOWNLOADER', '💾');
// Check prerequisites
if (!checkGhCli()) {
console.log(chalk.red('✗ GitHub CLI (gh) is not installed\n'));
console.log(chalk.gray('Install from: https://cli.github.com/\n'));
console.log(chalk.gray('Quick install:'));
console.log(chalk.gray(' • macOS: brew install gh'));
console.log(chalk.gray(' • Ubuntu: sudo apt install gh'));
console.log(chalk.gray(' • Windows: winget install GitHub.cli\n'));
return;
}
if (!checkGhAuth()) {
console.log(chalk.yellow('⚠️ Not authenticated with GitHub CLI\n'));
const { authenticate } = await inquirer.prompt([
{
type: 'confirm',
name: 'authenticate',
message: 'Authenticate now?',
default: true
}
]);
if (!authenticate) {
console.log(chalk.gray('\nCancelled'));
return;
}
try {
execSync('gh auth login', { stdio: 'inherit' });
} catch {
console.log(chalk.red('\n✗ Authentication failed'));
return;
}
console.log(chalk.green('\n✓ Authenticated successfully\n'));
}
// Get repository
let repo = getRepository();
if (!repo) {
const { inputRepo } = await inquirer.prompt([
{
type: 'input',
name: 'inputRepo',
message: purpleGold('Enter GitHub repository (owner/repo):'),
validate: (input) => {
if (!input.match(/^[^/]+\/[^/]+$/)) {
return 'Please enter in format: owner/repo';
}
return true;
}
}
]);
repo = inputRepo;
} else {
console.log(purpleGold(`Repository: ${repo}\n`));
}
// List databases
const databases = await listDatabases(repo);
if (!databases || databases.length === 0) {
console.log(chalk.yellow('\n⚠ No databases available for download'));
console.log(chalk.gray(' Database builds are created by GitHub Actions'));
console.log(chalk.gray(' Check the Actions tab in your repository\n'));
return;
}
// Show table
console.log(chalk.hex('#DA22FF')('\nAvailable Databases:\n'));
const Table = require('cli-table3');
const table = new Table({
head: [
chalk.hex('#DA22FF')('#'),
chalk.hex('#DA22FF')('Build Date'),
chalk.hex('#DA22FF')('Commit'),
chalk.hex('#DA22FF')('Size')
],
colWidths: [5, 25, 12, 12],
style: {
head: [],
border: ['gray']
}
});
databases.forEach((db, idx) => {
table.push([
chalk.gray(idx + 1),
chalk.hex('#FF69B4')(formatDate(db.createdAt)),
chalk.hex('#FFD700')(db.sha),
chalk.hex('#9733EE')(formatSize(db.artifact.size_in_bytes))
]);
});
console.log(table.toString());
// Select database
const choices = [
...databases.map((db, idx) => ({
name: `${idx + 1}. ${formatDate(db.createdAt)} (${db.sha}) - ${formatSize(db.artifact.size_in_bytes)}`,
value: idx
})),
new inquirer.Separator(),
{ name: chalk.gray('← Cancel'), value: -1 }
];
const { selection } = await inquirer.prompt([
{
type: 'list',
name: 'selection',
message: 'Select a database to download:',
choices: choices,
pageSize: 12
}
]);
if (selection === -1) {
console.log(chalk.gray('\nCancelled'));
return;
}
const selectedDb = databases[selection];
// Confirm download
const { confirm } = await inquirer.prompt([
{
type: 'confirm',
name: 'confirm',
message: `Download database from ${formatDate(selectedDb.createdAt)}?`,
default: true
}
]);
if (!confirm) {
console.log(chalk.gray('\nCancelled'));
return;
}
// Download and install
await downloadDatabase(repo, selectedDb.runId, selectedDb.artifact.name);
console.log(chalk.hex('#FFD700')('\n🎉 Ready to use!'));
console.log(chalk.gray(' Run: ./awesome\n'));
}
module.exports = {
manage
};

View File

@@ -22,6 +22,7 @@ async function showMainMenu() {
{ name: `${chalk.hex('#DA22FF')('📖')} Reading History`, value: 'history' },
new inquirer.Separator(chalk.gray('─'.repeat(50))),
{ name: `${chalk.hex('#FF69B4')('🔧')} Build/Rebuild Index`, value: 'index' },
{ name: `${chalk.hex('#9733EE')('💾')} Download Pre-Built Database`, value: 'db' },
{ name: `${chalk.hex('#FFD700')('📊')} Statistics`, value: 'stats' },
{ name: `${chalk.hex('#DA22FF')('⚙️')} Settings`, value: 'settings' },
new inquirer.Separator(chalk.gray('─'.repeat(50))),
@@ -98,6 +99,11 @@ async function handleMenuChoice(choice) {
await settings.manage();
break;
case 'db':
const dbDownload = require('./db-download');
await dbDownload.manage();
break;
default:
console.log(chalk.yellow('Invalid choice'));
}