diff --git a/README.md b/README.md index 5f98f3d..888b5c8 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ If you've already created a RunPod template: 1. Deploy pod from template in RunPod dashboard 2. SSH to the pod 3. Create `.env` file with your credentials -4. Start orchestrator: `docker compose -f docker-compose.gpu.yaml up -d orchestrator` +4. Start orchestrator: `docker compose -f compose.yaml up -d orchestrator` **See**: [RUNPOD_TEMPLATE.md](RUNPOD_TEMPLATE.md) for template usage instructions. @@ -107,7 +107,7 @@ models: endpoint: /v1/chat/completions ``` -Then add the Docker service to `docker-compose.gpu.yaml` and restart the orchestrator. +Then add the Docker service to `compose.yaml` and restart the orchestrator. **See**: [docs/DEPLOYMENT.md](docs/DEPLOYMENT.md#adding-new-models) for complete instructions. diff --git a/docker-compose.gpu.yaml b/compose.yaml similarity index 100% rename from docker-compose.gpu.yaml rename to compose.yaml diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 04736da..2c86473 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -94,7 +94,7 @@ ssh gpu-pivoine cd /workspace/ai/ # Start orchestrator (always running) -docker compose -f docker-compose.gpu.yaml up -d orchestrator +docker compose -f compose.yaml up -d orchestrator # Orchestrator will automatically manage model services as needed ``` @@ -217,7 +217,7 @@ models: ### Step 2: Add Docker Service ```yaml -# Add to ai/docker-compose.gpu.yaml +# Add to ai/compose.yaml services: vllm-llama: build: ./vllm @@ -245,7 +245,7 @@ services: ```bash ssh gpu-pivoine cd /workspace/ai/ -docker compose -f docker-compose.gpu.yaml restart orchestrator +docker compose -f compose.yaml restart orchestrator ``` **That's it!** The orchestrator automatically detects the new model. @@ -256,13 +256,13 @@ docker compose -f docker-compose.gpu.yaml restart orchestrator ```bash # Start orchestrator -docker compose -f docker-compose.gpu.yaml up -d orchestrator +docker compose -f compose.yaml up -d orchestrator # View orchestrator logs docker logs -f ai_orchestrator # Restart orchestrator -docker compose -f docker-compose.gpu.yaml restart orchestrator +docker compose -f compose.yaml restart orchestrator # Check active model curl http://100.100.108.13:9000/health @@ -287,10 +287,10 @@ curl http://100.100.108.13:9000/health | jq '.current_model' ```bash # Manually start a specific model (bypassing orchestrator) -docker compose -f docker-compose.gpu.yaml --profile text up -d vllm-qwen +docker compose -f compose.yaml --profile text up -d vllm-qwen # Stop a model -docker compose -f docker-compose.gpu.yaml stop vllm-qwen +docker compose -f compose.yaml stop vllm-qwen # View model logs docker logs -f ai_vllm-qwen_1 @@ -337,10 +337,10 @@ docker logs -f ai_musicgen_1 docker logs ai_orchestrator # Check if model service exists -docker compose -f docker-compose.gpu.yaml config | grep -A 10 "vllm-qwen" +docker compose -f compose.yaml config | grep -A 10 "vllm-qwen" # Manually test model service -docker compose -f docker-compose.gpu.yaml --profile text up -d vllm-qwen +docker compose -f compose.yaml --profile text up -d vllm-qwen curl http://localhost:8001/health ``` @@ -354,8 +354,8 @@ ls -l /var/run/docker.sock sudo systemctl restart docker # Rebuild orchestrator -docker compose -f docker-compose.gpu.yaml build orchestrator -docker compose -f docker-compose.gpu.yaml up -d orchestrator +docker compose -f compose.yaml build orchestrator +docker compose -f compose.yaml up -d orchestrator ``` ### Model Switching Too Slow @@ -376,7 +376,7 @@ docker run --rm -it --gpus all \ ``` ai/ -├── docker-compose.gpu.yaml # Main orchestration file +├── compose.yaml # Main orchestration file ├── .env.example # Environment template ├── README.md # This file │ diff --git a/docs/GPU_DEPLOYMENT_LOG.md b/docs/GPU_DEPLOYMENT_LOG.md index 206097b..34f30cd 100644 --- a/docs/GPU_DEPLOYMENT_LOG.md +++ b/docs/GPU_DEPLOYMENT_LOG.md @@ -260,7 +260,7 @@ models: ### Deployment Changes #### Docker Compose Structure -- **File**: `docker-compose.gpu.yaml` +- **File**: `compose.yaml` - **Services**: 4 total (1 orchestrator + 3 models) - **Profiles**: `text`, `image`, `audio` (orchestrator manages activation) - **Restart Policy**: `no` for models (orchestrator controls lifecycle) @@ -319,7 +319,7 @@ ssh gpu-pivoine cd /workspace/ai/ # Start orchestrator (manages everything) -docker compose -f docker-compose.gpu.yaml up -d orchestrator +docker compose -f compose.yaml up -d orchestrator # Check status curl http://100.100.108.13:9000/health diff --git a/docs/RUNPOD_TEMPLATE.md b/docs/RUNPOD_TEMPLATE.md index 99b712b..29621bc 100644 --- a/docs/RUNPOD_TEMPLATE.md +++ b/docs/RUNPOD_TEMPLATE.md @@ -116,14 +116,14 @@ ls -lh /workspace/musicgen/models/ # Test orchestrator starts cd /workspace/ai -docker compose -f docker-compose.gpu.yaml up -d orchestrator +docker compose -f compose.yaml up -d orchestrator docker logs ai_orchestrator # Test model loading (should be fast since models are cached) curl http://localhost:9000/health # Stop orchestrator -docker compose -f docker-compose.gpu.yaml down +docker compose -f compose.yaml down ``` ### Step 4: Clean Up Before Saving @@ -215,7 +215,7 @@ EOF tailscale up --authkey= # Start orchestrator (models already cached, starts in seconds!) -docker compose -f docker-compose.gpu.yaml up -d orchestrator +docker compose -f compose.yaml up -d orchestrator # Verify curl http://localhost:9000/health @@ -295,12 +295,12 @@ docker compose --profile audio up -d musicgen ```bash # Build images one at a time -docker compose -f docker-compose.gpu.yaml build orchestrator -docker compose -f docker-compose.gpu.yaml build vllm-qwen -docker compose -f docker-compose.gpu.yaml build musicgen +docker compose -f compose.yaml build orchestrator +docker compose -f compose.yaml build vllm-qwen +docker compose -f compose.yaml build musicgen # Check build logs for errors -docker compose -f docker-compose.gpu.yaml build --no-cache --progress=plain orchestrator +docker compose -f compose.yaml build --no-cache --progress=plain orchestrator ``` ### Tailscale Won't Install diff --git a/scripts/prepare-template.sh b/scripts/prepare-template.sh index c66676c..e803b0b 100644 --- a/scripts/prepare-template.sh +++ b/scripts/prepare-template.sh @@ -118,15 +118,15 @@ build_docker_images() { # Build orchestrator log_info "Building orchestrator..." - docker compose -f docker-compose.gpu.yaml build orchestrator + docker compose -f compose.yaml build orchestrator # Build vLLM log_info "Building vLLM..." - docker compose -f docker-compose.gpu.yaml build vllm-qwen + docker compose -f compose.yaml build vllm-qwen # Build MusicGen log_info "Building MusicGen..." - docker compose -f docker-compose.gpu.yaml build musicgen + docker compose -f compose.yaml build musicgen # Pull Flux image (pre-built) log_info "Pulling Flux.1 image..." @@ -196,7 +196,7 @@ validate_installation() { # Start orchestrator log_info "Starting orchestrator for validation..." - docker compose -f docker-compose.gpu.yaml up -d orchestrator + docker compose -f compose.yaml up -d orchestrator sleep 10 @@ -217,7 +217,7 @@ validate_installation() { fi # Stop orchestrator - docker compose -f docker-compose.gpu.yaml down + docker compose -f compose.yaml down log_success "Validation passed" }