bin/artifact_postgres_export.sh

#!/bin/bash
#
# artifact_postgres_export.sh - Export code artifacts from Open WebUI PostgreSQL database
#
# Usage: artifact_postgres_export.sh [OPTIONS] <chat_id> <output_dir>
#
# Arguments:
#   chat_id      Chat ID from Open WebUI URL (e.g., e135d74e-5b43-4b24-a651-e999f103942b)
#   output_dir   Directory to save extracted code files
#
# Options:
#   -h, --help           Show this help message
#   -H, --host HOST      PostgreSQL host (default: ai_postgres via Docker)
#   -u, --user USER      PostgreSQL user (default: ai)
#   -d, --db DATABASE    PostgreSQL database (default: openwebui)
#   -v, --verbose        Verbose output
#   --remote HOST        SSH remote host for Docker access (default: vps)
#   -f, --force          Force export even if output directory is not empty
#
# Examples:
#   artifact_postgres_export.sh e135d74e-5b43-4b24-a651-e999f103942b ~/Projects/rust/piglet
#   artifact_postgres_export.sh --remote vps abc123def456 ./output
#

set -euo pipefail

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
RESET='\033[0m'

# Default values
PG_CONTAINER="ai_postgres"
PG_USER="ai"
PG_DATABASE="openwebui"
REMOTE_HOST="vps"
VERBOSE=false
FORCE=false

# Functions
print_error() {
    echo -e "${RED}✗ Error:${RESET} $1" >&2
}

print_success() {
    echo -e "${GREEN}✓${RESET} $1"
}

print_info() {
    echo -e "${BLUE}ℹ${RESET} $1"
}

print_warning() {
    echo -e "${YELLOW}⚠${RESET} $1"
}

print_verbose() {
    if [[ "$VERBOSE" == "true" ]]; then
        echo -e "${CYAN}→${RESET} $1"
    fi
}

show_help() {
    sed -n '2,/^$/p' "$0" | sed 's/^# \?//' | head -n -1
    exit 0
}

# Parse options
POSITIONAL_ARGS=()
while [[ $# -gt 0 ]]; do
    case $1 in
        -h|--help)
            show_help
            ;;
        -H|--host)
            PG_CONTAINER="$2"
            shift 2
            ;;
        -u|--user)
            PG_USER="$2"
            shift 2
            ;;
        -d|--db)
            PG_DATABASE="$2"
            shift 2
            ;;
        -v|--verbose)
            VERBOSE=true
            shift
            ;;
        -f|--force)
            FORCE=true
            shift
            ;;
        --remote)
            REMOTE_HOST="$2"
            shift 2
            ;;
        -*)
            print_error "Unknown option: $1"
            echo "Use --help for usage information"
            exit 1
            ;;
        *)
            POSITIONAL_ARGS+=("$1")
            shift
            ;;
    esac
done

# Restore positional parameters
set -- "${POSITIONAL_ARGS[@]}"

# Validate arguments
if [[ $# -lt 1 ]]; then
    print_error "Missing required argument: chat_id"
    echo "Usage: $(basename "$0") [OPTIONS] <chat_id> [output_dir]"
    echo "Use --help for more information"
    exit 1
fi

CHAT_ID="$1"
OUTPUT_DIR="${2:-.}"  # Default to current directory if not specified

# Validate chat ID format (UUID)
if [[ ! "$CHAT_ID" =~ ^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$ ]]; then
    print_error "Invalid chat ID format. Expected UUID format."
    exit 1
fi

# Create output directory
mkdir -p "$OUTPUT_DIR"

# Check if output directory is empty (unless --force)
if [[ "$FORCE" == "false" ]]; then
    if [[ -n "$(ls -A "$OUTPUT_DIR" 2>/dev/null)" ]]; then
        print_error "Output directory is not empty: $OUTPUT_DIR"
        echo "Use --force to export anyway, or choose an empty directory"
        exit 1
    fi
fi

print_verbose "Output directory: $OUTPUT_DIR"

# Function to run PostgreSQL queries
run_psql() {
    local query="$1"
    if [[ -n "$REMOTE_HOST" ]]; then
        ssh -A "$REMOTE_HOST" "docker exec $PG_CONTAINER psql -U $PG_USER -d $PG_DATABASE -t -c \"$query\""
    else
        docker exec "$PG_CONTAINER" psql -U "$PG_USER" -d "$PG_DATABASE" -t -c "$query"
    fi
}

print_info "Fetching chat from database..."

# Check if chat exists
CHAT_EXISTS=$(run_psql "SELECT COUNT(*) FROM chat WHERE id = '$CHAT_ID';" | tr -d '[:space:]')
if [[ "$CHAT_EXISTS" != "1" ]]; then
    print_error "Chat ID not found in database"
    exit 1
fi

# Get chat title
CHAT_TITLE=$(run_psql "SELECT title FROM chat WHERE id = '$CHAT_ID';" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
print_success "Found chat: ${MAGENTA}${CHAT_TITLE}${RESET}"

# Export chat data to temporary file
TEMP_FILE=$(mktemp)
trap 'rm -f "$TEMP_FILE"' EXIT

print_verbose "Exporting chat data to temporary file..."
run_psql "SELECT chat FROM chat WHERE id = '$CHAT_ID';" > "$TEMP_FILE"

# Parse JSON and extract code blocks using Python
print_info "Extracting code blocks..."

python3 - "$TEMP_FILE" "$OUTPUT_DIR" "$CHAT_ID" "$CHAT_TITLE" "$GREEN" "$RED" "$YELLOW" "$BLUE" "$MAGENTA" "$CYAN" "$RESET" <<'PYTHON_SCRIPT'
import json
import re
import sys
from pathlib import Path

# Get arguments
temp_file = sys.argv[1]
output_dir = Path(sys.argv[2])
chat_id = sys.argv[3]
chat_title = sys.argv[4]
GREEN, RED, YELLOW, BLUE, MAGENTA, CYAN, RESET = sys.argv[5:12]

# Read JSON
with open(temp_file, 'r') as f:
    content = f.read().strip()

try:
    chat_data = json.loads(content)
except json.JSONDecodeError as e:
    print(f"{RED}✗ Error:{RESET} Failed to parse JSON: {e}", file=sys.stderr)
    sys.exit(1)

messages = chat_data.get('messages', [])
if isinstance(messages, dict):
    messages = list(messages.values())
if not messages:
    print(f"{YELLOW}⚠ Warning:{RESET} No messages found in chat")
    sys.exit(0)

code_blocks_found = 0

# Extension mapping
extensions = {
    'rust': '.rs', 'python': '.py', 'javascript': '.js', 'typescript': '.ts',
    'bash': '.sh', 'shell': '.sh', 'sh': '.sh', 'toml': '.toml',
    'yaml': '.yaml', 'yml': '.yml', 'json': '.json', 'md': '.md',
    'markdown': '.md', 'c': '.c', 'cpp': '.cpp', 'java': '.java',
    'go': '.go', 'html': '.html', 'css': '.css',
}

for idx, message in enumerate(messages):
    role = message.get('role', 'unknown')
    content = message.get('content', '')

    # Split content by code blocks
    parts = re.split(r'```', content)

    block_idx = 0
    for i in range(1, len(parts), 2):
        if i >= len(parts):
            break

        # Get the code block
        code_block = parts[i]
        lines = code_block.split('\n', 1)
        language = lines[0].strip().lower() if lines else 'txt'
        code = lines[1] if len(lines) > 1 else code_block

        # Look for filename in the text before this code block
        before_block = parts[i - 1]
        before_lines = before_block.strip().split('\n')

        filename = None
        original_filename = None

        # Check last few lines before code block for filenames
        for line in reversed(before_lines[-3:]):
            line = line.strip()
            # Pattern 1: ### `src/main.rs` or ### src/main.rs
            match = re.search(r'###?\s*`?([^\s`]+\.(rs|toml|py|js|sh|md|json|yaml|yml|html|css|c|cpp|java|go))`?', line, re.IGNORECASE)
            if match:
                original_filename = match.group(1)
                filename = original_filename  # Keep original path structure
                break
            # Pattern 2: Look for step indicators with filenames
            match = re.search(r'[:#]\s*`?([^\s`]+\.(rs|toml|py|js|sh|md|json|yaml|yml|html|css|c|cpp|java|go))`?', line, re.IGNORECASE)
            if match:
                original_filename = match.group(1)
                filename = original_filename  # Keep original path structure
                break

        # If no filename found, generate one
        if not filename:
            ext = extensions.get(language, '.txt')
            filename = f"{idx:03d}-{block_idx:02d}{ext}"

        # Strip leading slashes/dots to prevent absolute path issues
        filename = filename.lstrip('/')

        filepath = output_dir / filename

        # Create parent directories if needed
        filepath.parent.mkdir(parents=True, exist_ok=True)

        # Save code
        with open(filepath, 'w') as f:
            f.write(code.strip())

        if original_filename:
            print(f"{GREEN}✓{RESET} Saved: {filename} ({CYAN}{language}{RESET})")
        else:
            print(f"{GREEN}✓{RESET} Saved: {filename} ({CYAN}{language}{RESET})")
        code_blocks_found += 1
        block_idx += 1

if code_blocks_found == 0:
    print(f"{YELLOW}⚠ Warning:{RESET} No code blocks found in chat")
else:
    print(f"\n{GREEN}✓{RESET} Exported {MAGENTA}{code_blocks_found}{RESET} code blocks to {BLUE}{output_dir}{RESET}")
PYTHON_SCRIPT

exit 0