Major architecture overhaul to address RunPod Docker limitations: Core Infrastructure: - Add base_service.py: Abstract base class for all AI services - Add service_manager.py: Process lifecycle management - Add core/requirements.txt: Core dependencies Model Services (Standalone Python): - Add models/vllm/server.py: Qwen 2.5 7B text generation - Add models/flux/server.py: Flux.1 Schnell image generation - Add models/musicgen/server.py: MusicGen Medium music generation - Each service inherits from GPUService base class - OpenAI-compatible APIs - Standalone execution support Ansible Deployment: - Add playbook.yml: Comprehensive deployment automation - Add ansible.cfg: Ansible configuration - Add inventory.yml: Localhost inventory - Tags: base, python, dependencies, models, tailscale, validate, cleanup Scripts: - Add scripts/install.sh: Full installation wrapper - Add scripts/download-models.sh: Model download wrapper - Add scripts/start-all.sh: Start orchestrator - Add scripts/stop-all.sh: Stop all services Documentation: - Update ARCHITECTURE.md: Document distributed VPS+GPU architecture Benefits: - No Docker: Avoids RunPod CAP_SYS_ADMIN limitations - Fully reproducible via Ansible - Extensible: Add models in 3 steps - Direct Python execution (no container overhead) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
27 lines
690 B
YAML
27 lines
690 B
YAML
---
|
|
# Ansible inventory for RunPod deployment
|
|
#
|
|
# This inventory defines localhost as the target for RunPod instances.
|
|
# All tasks run locally on the RunPod GPU server.
|
|
|
|
all:
|
|
hosts:
|
|
localhost:
|
|
ansible_connection: local
|
|
ansible_python_interpreter: /usr/bin/python3
|
|
|
|
vars:
|
|
# Workspace configuration
|
|
workspace_dir: /workspace
|
|
ai_dir: /workspace/ai
|
|
|
|
# Environment variables (loaded from .env if present)
|
|
hf_token: "{{ lookup('env', 'HF_TOKEN') }}"
|
|
tailscale_key: "{{ lookup('env', 'TAILSCALE_AUTH_KEY') | default('') }}"
|
|
|
|
# GPU configuration
|
|
gpu_memory_utilization: 0.85
|
|
|
|
# Model cache
|
|
huggingface_cache: /workspace/huggingface_cache
|