Files
bin/artifact_postgres_export.sh

295 lines
8.3 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
#
# artifact_postgres_export.sh - Export code artifacts from Open WebUI PostgreSQL database
#
# Usage: artifact_postgres_export.sh [OPTIONS] <chat_id> <output_dir>
#
# Arguments:
# chat_id Chat ID from Open WebUI URL (e.g., e135d74e-5b43-4b24-a651-e999f103942b)
# output_dir Directory to save extracted code files
#
# Options:
# -h, --help Show this help message
# -H, --host HOST PostgreSQL host (default: ai_postgres via Docker)
# -u, --user USER PostgreSQL user (default: ai)
# -d, --db DATABASE PostgreSQL database (default: openwebui)
# -v, --verbose Verbose output
# --remote HOST SSH remote host for Docker access (default: vps)
# -f, --force Force export even if output directory is not empty
#
# Examples:
# artifact_postgres_export.sh e135d74e-5b43-4b24-a651-e999f103942b ~/Projects/rust/piglet
# artifact_postgres_export.sh --remote vps abc123def456 ./output
#
set -euo pipefail
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
RESET='\033[0m'
# Default values
PG_CONTAINER="ai_postgres"
PG_USER="ai"
PG_DATABASE="openwebui"
REMOTE_HOST="vps"
VERBOSE=false
FORCE=false
# Functions
print_error() {
echo -e "${RED}✗ Error:${RESET} $1" >&2
}
print_success() {
echo -e "${GREEN}${RESET} $1"
}
print_info() {
echo -e "${BLUE}${RESET} $1"
}
print_warning() {
echo -e "${YELLOW}${RESET} $1"
}
print_verbose() {
if [[ "$VERBOSE" == "true" ]]; then
echo -e "${CYAN}${RESET} $1"
fi
}
show_help() {
sed -n '2,/^$/p' "$0" | sed 's/^# \?//' | head -n -1
exit 0
}
# Parse options
POSITIONAL_ARGS=()
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
show_help
;;
-H|--host)
PG_CONTAINER="$2"
shift 2
;;
-u|--user)
PG_USER="$2"
shift 2
;;
-d|--db)
PG_DATABASE="$2"
shift 2
;;
-v|--verbose)
VERBOSE=true
shift
;;
-f|--force)
FORCE=true
shift
;;
--remote)
REMOTE_HOST="$2"
shift 2
;;
-*)
print_error "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
*)
POSITIONAL_ARGS+=("$1")
shift
;;
esac
done
# Restore positional parameters
set -- "${POSITIONAL_ARGS[@]}"
# Validate arguments
if [[ $# -lt 1 ]]; then
print_error "Missing required argument: chat_id"
echo "Usage: $(basename "$0") [OPTIONS] <chat_id> [output_dir]"
echo "Use --help for more information"
exit 1
fi
CHAT_ID="$1"
OUTPUT_DIR="${2:-.}" # Default to current directory if not specified
# Validate chat ID format (UUID)
if [[ ! "$CHAT_ID" =~ ^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$ ]]; then
print_error "Invalid chat ID format. Expected UUID format."
exit 1
fi
# Create output directory
mkdir -p "$OUTPUT_DIR"
# Check if output directory is empty (unless --force)
if [[ "$FORCE" == "false" ]]; then
if [[ -n "$(ls -A "$OUTPUT_DIR" 2>/dev/null)" ]]; then
print_error "Output directory is not empty: $OUTPUT_DIR"
echo "Use --force to export anyway, or choose an empty directory"
exit 1
fi
fi
print_verbose "Output directory: $OUTPUT_DIR"
# Function to run PostgreSQL queries
run_psql() {
local query="$1"
if [[ -n "$REMOTE_HOST" ]]; then
ssh -A "$REMOTE_HOST" "docker exec $PG_CONTAINER psql -U $PG_USER -d $PG_DATABASE -t -c \"$query\""
else
docker exec "$PG_CONTAINER" psql -U "$PG_USER" -d "$PG_DATABASE" -t -c "$query"
fi
}
print_info "Fetching chat from database..."
# Check if chat exists
CHAT_EXISTS=$(run_psql "SELECT COUNT(*) FROM chat WHERE id = '$CHAT_ID';" | tr -d '[:space:]')
if [[ "$CHAT_EXISTS" != "1" ]]; then
print_error "Chat ID not found in database"
exit 1
fi
# Get chat title
CHAT_TITLE=$(run_psql "SELECT title FROM chat WHERE id = '$CHAT_ID';" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
print_success "Found chat: ${MAGENTA}${CHAT_TITLE}${RESET}"
# Export chat data to temporary file
TEMP_FILE=$(mktemp)
trap 'rm -f "$TEMP_FILE"' EXIT
print_verbose "Exporting chat data to temporary file..."
run_psql "SELECT chat FROM chat WHERE id = '$CHAT_ID';" > "$TEMP_FILE"
# Parse JSON and extract code blocks using Python
print_info "Extracting code blocks..."
python3 - "$TEMP_FILE" "$OUTPUT_DIR" "$CHAT_ID" "$CHAT_TITLE" "$GREEN" "$RED" "$YELLOW" "$BLUE" "$MAGENTA" "$CYAN" "$RESET" <<'PYTHON_SCRIPT'
import json
import re
import sys
from pathlib import Path
# Get arguments
temp_file = sys.argv[1]
output_dir = Path(sys.argv[2])
chat_id = sys.argv[3]
chat_title = sys.argv[4]
GREEN, RED, YELLOW, BLUE, MAGENTA, CYAN, RESET = sys.argv[5:12]
# Read JSON
with open(temp_file, 'r') as f:
content = f.read().strip()
try:
chat_data = json.loads(content)
except json.JSONDecodeError as e:
print(f"{RED}✗ Error:{RESET} Failed to parse JSON: {e}", file=sys.stderr)
sys.exit(1)
messages = chat_data.get('messages', [])
if isinstance(messages, dict):
messages = list(messages.values())
if not messages:
print(f"{YELLOW}⚠ Warning:{RESET} No messages found in chat")
sys.exit(0)
code_blocks_found = 0
# Extension mapping
extensions = {
'rust': '.rs', 'python': '.py', 'javascript': '.js', 'typescript': '.ts',
'bash': '.sh', 'shell': '.sh', 'sh': '.sh', 'toml': '.toml',
'yaml': '.yaml', 'yml': '.yml', 'json': '.json', 'md': '.md',
'markdown': '.md', 'c': '.c', 'cpp': '.cpp', 'java': '.java',
'go': '.go', 'html': '.html', 'css': '.css',
}
for idx, message in enumerate(messages):
role = message.get('role', 'unknown')
content = message.get('content', '')
# Split content by code blocks
parts = re.split(r'```', content)
block_idx = 0
for i in range(1, len(parts), 2):
if i >= len(parts):
break
# Get the code block
code_block = parts[i]
lines = code_block.split('\n', 1)
language = lines[0].strip().lower() if lines else 'txt'
code = lines[1] if len(lines) > 1 else code_block
# Look for filename in the text before this code block
before_block = parts[i - 1]
before_lines = before_block.strip().split('\n')
filename = None
original_filename = None
# Check last few lines before code block for filenames
for line in reversed(before_lines[-3:]):
line = line.strip()
# Pattern 1: ### `src/main.rs` or ### src/main.rs
match = re.search(r'###?\s*`?([^\s`]+\.(rs|toml|py|js|sh|md|json|yaml|yml|html|css|c|cpp|java|go))`?', line, re.IGNORECASE)
if match:
original_filename = match.group(1)
filename = original_filename # Keep original path structure
break
# Pattern 2: Look for step indicators with filenames
match = re.search(r'[:#]\s*`?([^\s`]+\.(rs|toml|py|js|sh|md|json|yaml|yml|html|css|c|cpp|java|go))`?', line, re.IGNORECASE)
if match:
original_filename = match.group(1)
filename = original_filename # Keep original path structure
break
# If no filename found, generate one
if not filename:
ext = extensions.get(language, '.txt')
filename = f"{idx:03d}-{block_idx:02d}{ext}"
# Strip leading slashes/dots to prevent absolute path issues
filename = filename.lstrip('/')
filepath = output_dir / filename
# Create parent directories if needed
filepath.parent.mkdir(parents=True, exist_ok=True)
# Save code
with open(filepath, 'w') as f:
f.write(code.strip())
if original_filename:
print(f"{GREEN}✓{RESET} Saved: {filename} ({CYAN}{language}{RESET})")
else:
print(f"{GREEN}✓{RESET} Saved: {filename} ({CYAN}{language}{RESET})")
code_blocks_found += 1
block_idx += 1
if code_blocks_found == 0:
print(f"{YELLOW}⚠ Warning:{RESET} No code blocks found in chat")
else:
print(f"\n{GREEN}✓{RESET} Exported {MAGENTA}{code_blocks_found}{RESET} code blocks to {BLUE}{output_dir}{RESET}")
PYTHON_SCRIPT
exit 0