From e6644ada6194cb7b6e88dc25d5ce386d979d03ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Wed, 26 Nov 2025 15:31:41 +0100 Subject: [PATCH] Initial commit --- .gitea/workflows/build-docker-image.yml | 114 ++++++++++ .gitignore | 24 +++ Dockerfile | 48 +++++ arty.yml | 253 ++++++++++++++++++++++ models/models_civitai.yaml | 134 ++++++++++++ models/models_huggingface.yaml | 137 ++++++++++++ services/vllm/config_bge.yaml | 4 + services/vllm/config_llama.yaml | 4 + services/vllm/requirements.txt | 1 + services/webdav-sync/requirements.txt | 2 + services/webdav-sync/webdav_sync.py | 267 ++++++++++++++++++++++++ start.sh | 56 +++++ supervisord.conf | 120 +++++++++++ 13 files changed, 1164 insertions(+) create mode 100644 .gitea/workflows/build-docker-image.yml create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 arty.yml create mode 100644 models/models_civitai.yaml create mode 100644 models/models_huggingface.yaml create mode 100644 services/vllm/config_bge.yaml create mode 100644 services/vllm/config_llama.yaml create mode 100644 services/vllm/requirements.txt create mode 100644 services/webdav-sync/requirements.txt create mode 100644 services/webdav-sync/webdav_sync.py create mode 100644 start.sh create mode 100644 supervisord.conf diff --git a/.gitea/workflows/build-docker-image.yml b/.gitea/workflows/build-docker-image.yml new file mode 100644 index 0000000..1edf900 --- /dev/null +++ b/.gitea/workflows/build-docker-image.yml @@ -0,0 +1,114 @@ +name: Build and Push RunPod Docker Image + +on: + push: + branches: + - main + tags: + - "v*.*.*" + pull_request: + branches: + - main + workflow_dispatch: + inputs: + tag: + description: "Custom tag for the image" + required: false + default: "manual" + +env: + REGISTRY: dev.pivoine.art + IMAGE_NAME: valknar/runpod-ai-orchestrator + +jobs: + build-and-push: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + platforms: linux/amd64 + + - name: Log in to Gitea Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ gitea.actor }} + password: ${{ secrets.REGISTRY_TOKEN }} + + - name: Extract metadata (tags, labels) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + # Tag as 'latest' for main branch + type=raw,value=latest,enable={{is_default_branch}} + # Tag with branch name + type=ref,event=branch + # Tag with PR number + type=ref,event=pr + # Tag with git tag (semver) + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + # Tag with commit SHA + type=sha,prefix={{branch}}- + # Custom tag from workflow_dispatch + type=raw,value=${{ gitea.event.inputs.tag }},enable=${{ gitea.event_name == 'workflow_dispatch' }} + labels: | + org.opencontainers.image.title=RunPod AI Orchestrator + org.opencontainers.image.description=Minimal Docker template for RunPod deployment with ComfyUI + vLLM orchestration, Supervisor process management, and Tailscale VPN integration + org.opencontainers.image.vendor=valknar + org.opencontainers.image.source=https://dev.pivoine.art/${{ gitea.repository }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + platforms: linux/amd64 + push: ${{ gitea.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache + cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache,mode=max + + - name: Generate image digest + if: gitea.event_name != 'pull_request' + run: | + echo "### Docker Image Published :rocket:" >> $GITEA_STEP_SUMMARY + echo "" >> $GITEA_STEP_SUMMARY + echo "**Registry:** \`${{ env.REGISTRY }}\`" >> $GITEA_STEP_SUMMARY + echo "**Image:** \`${{ env.IMAGE_NAME }}\`" >> $GITEA_STEP_SUMMARY + echo "" >> $GITEA_STEP_SUMMARY + echo "**Tags:**" >> $GITEA_STEP_SUMMARY + echo "\`\`\`" >> $GITEA_STEP_SUMMARY + echo "${{ steps.meta.outputs.tags }}" >> $GITEA_STEP_SUMMARY + echo "\`\`\`" >> $GITEA_STEP_SUMMARY + echo "" >> $GITEA_STEP_SUMMARY + echo "**Pull command:**" >> $GITEA_STEP_SUMMARY + echo "\`\`\`bash" >> $GITEA_STEP_SUMMARY + echo "docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> $GITEA_STEP_SUMMARY + echo "\`\`\`" >> $GITEA_STEP_SUMMARY + echo "" >> $GITEA_STEP_SUMMARY + echo "**Use in RunPod template:**" >> $GITEA_STEP_SUMMARY + echo "\`\`\`" >> $GITEA_STEP_SUMMARY + echo "Container Image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> $GITEA_STEP_SUMMARY + echo "\`\`\`" >> $GITEA_STEP_SUMMARY + + - name: PR Comment - Image built but not pushed + if: gitea.event_name == 'pull_request' + run: | + echo "### Docker Image Built Successfully :white_check_mark:" >> $GITEA_STEP_SUMMARY + echo "" >> $GITEA_STEP_SUMMARY + echo "Image was built successfully but **not pushed** (PR builds are not published)." >> $GITEA_STEP_SUMMARY + echo "" >> $GITEA_STEP_SUMMARY + echo "**Would be tagged as:**" >> $GITEA_STEP_SUMMARY + echo "\`\`\`" >> $GITEA_STEP_SUMMARY + echo "${{ steps.meta.outputs.tags }}" >> $GITEA_STEP_SUMMARY + echo "\`\`\`" >> $GITEA_STEP_SUMMARY diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7fff4b8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +# Environment variables +.env + +# Virtual environments +venv/ + +# Logs +.logs/ + +# Supervisord runtime files +supervisord.pid +supervisor.sock + +# OS files +.DS_Store +Thumbs.db + +# Model cache +.cache/ + +# Temporary files +*.tmp +tmp/ +temp/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8ffff37 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,48 @@ +# RunPod AI Orchestrator Template +# Minimal Docker image for ComfyUI + vLLM orchestration +# Models and application code live on network volume at /workspace + +FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04 + +# Install additional system utilities +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + python3-dev \ + python3-pip \ + python3-venv \ + git \ + curl \ + wget \ + htop \ + tmux \ + net-tools \ + ffmpeg \ + libavcodec-dev \ + libavformat-dev \ + libavutil-dev \ + libswscale-dev \ + bc \ + jq \ + openssh-server \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Install Supervisor for process management +RUN pip install --upgrade pip && pip install --no-cache-dir supervisor + +# Install Tailscale for VPN connectivity +RUN curl -fsSL https://tailscale.com/install.sh | sh + +# Install yq +RUN wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq && \ + chmod +x /usr/local/bin/yq + +# Copy the startup script +COPY start.sh /start.sh +RUN chmod +x /start.sh + +# Set working directory to /workspace (network volume mount point) +WORKDIR /workspace + +# RunPod calls /start.sh by default +CMD ["/start.sh"] diff --git a/arty.yml b/arty.yml new file mode 100644 index 0000000..aa200d7 --- /dev/null +++ b/arty.yml @@ -0,0 +1,253 @@ +name: "RunPod AI Service Orchestrator" +version: "0.1.0" +description: "Process-based AI service orchestrator for RunPod GPU instances with ComfyUI integration" +author: "valknar@pivoine.art" +license: "MIT" + +# Environment profiles for selective repository management +envs: + # RunPod environment variables + default: + COMFYUI_ROOT: services/comfyui + AUDIOCRAFT_ROOT: services/audiocraft + +# Git repositories to clone for a fresh RunPod deployment +references: + # AudioCraft Studio + - url: ssh://git@dev.pivoine.art:2222/valknar/audiocraft-ui.git + into: $AUDIOCRAFT_ROOT + # ComfyUI base installation + - url: https://github.com/comfyanonymous/ComfyUI.git + into: $COMFYUI_ROOT + description: "ComfyUI - Node-based interface for image/video/audio generation" + + # ComfyUI Essential Custom Nodes + - url: https://github.com/ltdrdata/ComfyUI-Manager.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI-Manager + description: "ComfyUI Manager - Install/manage custom nodes and models" + + - url: https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI-VideoHelperSuite + description: "Video operations and processing" + + - url: https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI-AnimateDiff-Evolved + description: "AnimateDiff for video generation" + + - url: https://github.com/cubiq/ComfyUI_IPAdapter_plus.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI_IPAdapter_plus + description: "IP-Adapter for style transfer" + + - url: https://github.com/ltdrdata/ComfyUI-Impact-Pack.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI-Impact-Pack + description: "Auto face enhancement and detailer" + + # ComfyUI Optional Custom Nodes + - url: https://github.com/kijai/ComfyUI-CogVideoXWrapper.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI-CogVideoXWrapper + description: "CogVideoX integration for text-to-video" + + - url: https://github.com/ltdrdata/ComfyUI-Inspire-Pack.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI-Inspire-Pack + description: "Additional inspiration tools" + + - url: https://github.com/Kosinkadink/ComfyUI-Advanced-ControlNet.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI-Advanced-ControlNet + description: "Advanced ControlNet features" + + - url: https://github.com/MrForExample/ComfyUI-3D-Pack.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI-3D-Pack + description: "3D asset generation" + + - url: https://github.com/MixLabPro/comfyui-sound-lab.git + into: $COMFYUI_ROOT/custom_nodes/comfyui-sound-lab + description: "MusicGen and Stable Audio integration" + + - url: https://github.com/billwuhao/ComfyUI_DiffRhythm.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI_DiffRhythm + description: "DiffRhythm - Full-length song generation (up to 4m45s) with text/audio conditioning" + + - url: https://github.com/billwuhao/ComfyUI_ACE-Step.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI_ACE-Step + description: "ACE Step - State-of-the-art music generation with 19-language support, voice cloning, and superior coherence" + + - url: https://github.com/ssitu/ComfyUI_UltimateSDUpscale.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI_UltimateSDUpscale + description: "Ultimate SD Upscale for high-quality image upscaling" + + - url: https://github.com/kijai/ComfyUI-KJNodes.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI-KJNodes + description: "Kijai optimizations for HunyuanVideo and Wan2.2 (FP8 scaling, video helpers, model loading)" + + - url: https://github.com/Fannovel16/comfyui_controlnet_aux.git + into: $COMFYUI_ROOT/custom_nodes/comfyui_controlnet_aux + description: "ControlNet preprocessors (Canny, Depth, OpenPose, MLSD) for Wan2.2 Fun Control" + + - url: https://github.com/city96/ComfyUI-GGUF.git + into: $COMFYUI_ROOT/custom_nodes/ComfyUI-GGUF + description: "GGUF quantization support for memory-efficient model loading" + + - url: https://github.com/11cafe/comfyui-workspace-manager.git + into: $COMFYUI_ROOT/custom_nodes/comfyui-workspace-manager + description: "Workspace manager for ComfyUI - workflow/model organization" + + - url: ssh://git@dev.pivoine.art:2222/valknar/comfyui-extras.git + into: $COMFYUI_ROOT/custom_nodes/comfyui-extras + +# Deployment scripts for RunPod instances +scripts: + setup: | + arty deps + arty setup/tailscale + arty setup/services + arty setup/comfyui + arty models/download + arty models/link + arty supervisor/start + + setup/tailscale: | + echo "=========================================" + echo " Setting up tailscale" + echo "=========================================" + echo "" + + if [ -n "${TAILSCALE_AUTHKEY:-}" ]; then + echo " Starting Tailscale daemon..." + tailscaled --tun=userspace-networking --socks5-server=localhost:1055 & + sleep 3 + + echo " Connecting to Tailscale network..." + HOSTNAME="runpod-ai-orchestrator" + tailscale up --authkey="$TAILSCALE_AUTHKEY" --advertise-tags=tag:gpu --hostname="$HOSTNAME" || { + echo " ⚠ Tailscale connection failed, continuing without VPN" + } + + # Get Tailscale IP if connected + TAILSCALE_IP=$(tailscale ip -4 2>/dev/null || echo "not connected") + if [ "$TAILSCALE_IP" != "not connected" ]; then + echo " āœ“ Tailscale connected" + echo " Hostname: $HOSTNAME" + echo " IP: $TAILSCALE_IP" + + # Export for other services + export GPU_TAILSCALE_IP="$TAILSCALE_IP" + else + echo " ⚠ Tailscale failed to obtain IP" + fi + else + echo " ⚠ Tailscale disabled (no TAILSCALE_AUTHKEY in env)" + echo " Services requiring VPN connectivity will not work" + fi + + setup/services: | + echo "=========================================" + echo " Setting up services python venvs" + echo "=========================================" + echo "" + + # Install dependencies for each custom node + echo "Setting up venvs for all services..." + cd services + + installed_count=0 + skipped_count=0 + + for dir in */; do + cd $dir && python3 -m venv venv && source venv/bin/activate + if [ -f "${dir}requirements.txt" ]; then + echo "" + echo "šŸ“¦ Installing ${dir%/} dependencies..." + if pip install -r "${dir}requirements.txt"; then + installed_count=$((installed_count + 1)) + echo " āœ“ ${dir%/} dependencies installed" + else + echo " ⚠ Warning: Some dependencies for ${dir%/} may have failed" + installed_count=$((installed_count + 1)) + fi + else + skipped_count=$((skipped_count + 1)) + fi + deactivate && cd .. + done + + echo "" + echo "āœ“ Service environments configured successfully" + echo " Services with dependencies: $installed_count" + echo " Services without requirements.txt: $skipped_count" + + setup/comfyui: | + echo "=========================================" + echo " Setting up ComfyUI" + echo "=========================================" + echo "" + + cd $COMFYUI_ROOT + + # Activate ComfyUI venv + if [ ! -d "venv" ]; then + echo "āŒ ERROR: ComfyUI venv not found. Run setup/venvs first!" + exit 1 + fi + + source venv/bin/activate + + # Install common extension dependencies + echo "Installing common extension dependencies..." + pip install GitPython opencv-python-headless diffusers insightface onnxruntime + + # Create model directory structure + echo "Creating ComfyUI model directories..." + mkdir -p $COMFYUI_ROOT/models/{checkpoints,unet,vae,loras,clip,clip_vision,controlnet,ipadapter,embeddings,upscale_models,video_models,animatediff_models,animatediff_motion_lora,audio_models,configs,diffusers,diffusion_models,musicgen} + + # Install dependencies for each custom node + echo "Installing dependencies for all custom nodes..." + cd custom_nodes + + installed_count=0 + skipped_count=0 + + for dir in */; do + if [ -f "${dir}requirements.txt" ]; then + echo "" + echo "šŸ“¦ Installing ${dir%/} dependencies..." + if pip install -r "${dir}requirements.txt"; then + installed_count=$((installed_count + 1)) + echo " āœ“ ${dir%/} dependencies installed" + else + echo " ⚠ Warning: Some dependencies for ${dir%/} may have failed" + installed_count=$((installed_count + 1)) + fi + else + skipped_count=$((skipped_count + 1)) + fi + done + + deactivate + + echo "" + echo "āœ“ Extension dependencies installation complete" + echo " Extensions with dependencies: $installed_count" + echo " Extensions without requirements.txt: $skipped_count" + + models/download: | + artifact_civitai_download.sh download -c ./models/models_civitai.yaml --cache-dir ./.cache/civitai + artifact_huggingface_download.sh download -c ./models/models_huggingface.yaml --cache-dir ./.cache/huggingface + + models/link: | + artifact_civitai_download.sh link -c ./models/models_civitai.yaml --cache-dir ./.cache/civitai --output-dir ./services/comfyui/models + artifact_huggingface_download.sh link -c ./models/models_huggingface.yaml --cache-dir ./.cache/huggingface --output-dir ./services/comfyui/models + + # + # Supervisor Control Scripts + # + supervisor/start: | + supervisord -c supervisord.conf + + supervisor/stop: | + supervisorctl -c supervisord.conf shutdown + + supervisor/status: | + supervisorctl -c supervisord.conf status + + supervisor/restart: | + supervisorctl -c supervisord.conf restart all diff --git a/models/models_civitai.yaml b/models/models_civitai.yaml new file mode 100644 index 0000000..ceb0c42 --- /dev/null +++ b/models/models_civitai.yaml @@ -0,0 +1,134 @@ +- repo_id: dreamshaper-v8 + version_id: 128713 + model_id: 4384 + description: "DreamShaper - Vāˆž!" + category: image + type: checkpoints + format: safetensors + +- repo_id: lustify-v7-ggwp + version_id: "1094291" + model_id: "573152" + description: "LUSTIFY v7.0 GGWP - Photoreal NSFW checkpoint for women in sexual scenarios" + category: image + type: checkpoints + format: safetensors + +- repo_id: pony-diffusion-v6-xl + version_id: "135867" + model_id: "257749" + description: "Pony Diffusion V6 XL - Versatile anime/cartoon/furry model with balanced content" + category: image + type: checkpoints + format: safetensors + +- repo_id: realvisxl-v5 + version_id: "798204" + model_id: "139562" + description: "RealVisXL V5.0 Lightning - Photorealistic model for high-quality realistic images" + category: image + type: checkpoints + format: safetensors + +- repo_id: wai-nsfw-illustrious-sdxl + version_id: "2167369" + model_id: "827184" + description: "WAI-NSFW-illustrious-SDXL v15.0 - Actively updated NSFW Illustrious variant" + category: image + type: checkpoints + format: safetensors + +- repo_id: talmendoxl + version_id: "131869" + model_id: "119202" + description: "TalmendoXL - Uncensored SDXL model biased towards photorealism" + category: image + type: checkpoints + format: safetensors + +- repo_id: big-lust-v1-6 + version_id: "1081768" + model_id: "575395" + category: image + type: checkpoints + format: safetensors + +- repo_id: baddream + version_id: "77169" + model_id: "72437" + description: "BadDream v1.0 - Negative embedding for dreamshaper style artifacts" + category: image + type: embeddings + format: pt + +- repo_id: unrealisticdream + version_id: "77173" + model_id: "72437" + description: "UnrealisticDream v1.0 - Improves realistic images, pairs with BadDream" + category: image + type: embeddings + format: pt + +- repo_id: badhandv4 + version_id: "20068" + model_id: "16993" + description: "badhandv4 - Improves hand details with minimal style impact" + category: image + type: embeddings + format: pt + +- repo_id: easynegative + version_id: "9208" + model_id: "7808" + description: "EasyNegative - General-purpose negative embedding" + category: image + type: embeddings + format: safetensors + +- repo_id: fastnegativev2 + version_id: "94057" + model_id: "71961" + description: "FastNegativeV2 - Token mix of common negative prompts" + category: image + type: embeddings + format: pt + +- repo_id: badneganatomy + version_id: "64063" + model_id: "59614" + description: "BadNegAnatomyV1-neg - Improves aesthetics and character anatomy" + category: image + type: embeddings + format: pt + +- repo_id: badx-sdxl + version_id: "981304" + model_id: "122403" + description: "Bad X v1.1 - SDXL negative embedding for anatomy and realism" + category: image + type: embeddings + format: pt + +- repo_id: pony-pdxl-hq-v3 + version_id: "720175" + model_id: "332646" + description: "Pony PDXL High Quality V3 (zPDXL3) - Quality enhancer for Pony models" + category: image + type: embeddings + format: safetensors + +- repo_id: pony-pdxl-xxx + version_id: "380277" + model_id: "332646" + description: "Pony PDXL XXX Rating (zPDXLxxx) - Enables NSFW content" + category: image + type: embeddings + format: pt + +- repo_id: add-detail-xl + version_id: "135867" + model_id: "122359" + description: "Add Detail XL - Detail enhancement LoRA for SDXL" + category: image + type: loras + format: safetensors diff --git a/models/models_huggingface.yaml b/models/models_huggingface.yaml new file mode 100644 index 0000000..69bedeb --- /dev/null +++ b/models/models_huggingface.yaml @@ -0,0 +1,137 @@ +- repo_id: black-forest-labs/FLUX.1-schnell + description: FLUX.1 Schnell - Fast 4-step inference + category: image + files: + - source: "flux1-schnell.safetensors" + dest: "unet/flux1-schnell.safetensors" + - source: "ae.safetensors" + dest: "vae/ae.safetensors" + +- repo_id: black-forest-labs/FLUX.1-dev + description: FLUX.1 Dev - Balanced quality/speed + category: image + files: + - source: "flux1-dev.safetensors" + dest: "unet/flux1-dev.safetensors" + +- repo_id: runwayml/stable-diffusion-v1-5 + description: SD 1.5 - For AnimateDiff + category: image + files: + - source: "v1-5-pruned-emaonly.safetensors" + dest: "checkpoints/v1-5-pruned-emaonly.safetensors" + +- repo_id: stabilityai/stable-diffusion-xl-base-1.0 + description: SDXL Base 1.0 - Industry standard + category: image + files: + - source: "sd_xl_base_1.0.safetensors" + dest: "checkpoints/sd_xl_base_1.0.safetensors" + +- repo_id: stabilityai/stable-diffusion-xl-refiner-1.0 + description: SDXL Refiner 1.0 - Enhances base output + category: image + files: + - source: "sd_xl_refiner_1.0.safetensors" + dest: "checkpoints/sd_xl_refiner_1.0.safetensors" + +- repo_id: stabilityai/stable-diffusion-3.5-large + description: SD 3.5 Large - Latest Stability AI + category: image + files: + - source: "sd3.5_large.safetensors" + dest: "checkpoints/sd3.5_large.safetensors" + - source: "text_encoders/t5xxl_fp16.safetensors" + dest: "text_encoders/t5xxl_fp16.safetensors" + - source: "text_encoders/clip_l.safetensors" + dest: "text_encoders/clip_l.safetensors" + - source: "text_encoders/clip_g.safetensors" + dest: "text_encoders/clip_g.safetensors" + +- repo_id: THUDM/CogVideoX-5b + description: CogVideoX-5B - Professional text-to-video + category: video + files: + - source: "transformer/diffusion_pytorch_model-00001-of-00002.safetensors" + dest: "diffusion_models/cogvideox-5b-transformer-00001-of-00002.safetensors" + - source: "transformer/diffusion_pytorch_model-00002-of-00002.safetensors" + dest: "diffusion_models/cogvideox-5b-transformer-00002-of-00002.safetensors" + - source: "transformer/diffusion_pytorch_model.safetensors.index.json" + dest: "diffusion_models/cogvideox-5b-transformer.safetensors.index.json" + +- repo_id: THUDM/CogVideoX-5b-I2V + description: CogVideoX-5B-I2V - Image-to-video generation + category: video + files: + - source: "transformer/diffusion_pytorch_model-00001-of-00003.safetensors" + dest: "diffusion_models/cogvideox-5b-i2v-transformer-00001-of-00003.safetensors" + - source: "transformer/diffusion_pytorch_model-00002-of-00003.safetensors" + dest: "diffusion_models/cogvideox-5b-i2v-transformer-00002-of-00003.safetensors" + - source: "transformer/diffusion_pytorch_model-00003-of-00003.safetensors" + dest: "diffusion_models/cogvideox-5b-i2v-transformer-00003-of-00003.safetensors" + - source: "transformer/diffusion_pytorch_model.safetensors.index.json" + dest: "diffusion_models/cogvideox-5b-i2v-transformer.safetensors.index.json" + +- repo_id: stabilityai/stable-video-diffusion-img2vid + description: SVD - 14 frame image-to-video + category: video + files: + - source: "svd.safetensors" + dest: "checkpoints/svd.safetensors" + +- repo_id: stabilityai/stable-video-diffusion-img2vid-xt + description: SVD-XT - 25 frame image-to-video + category: video + files: + - source: "svd_xt.safetensors" + dest: "checkpoints/svd_xt.safetensors" + +- repo_id: openai/clip-vit-large-patch14 + description: CLIP H - For SD 1.5 IP-Adapter + category: video + files: + - source: "model.safetensors" + dest: "clip_vision/CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors" + +- repo_id: laion/CLIP-ViT-bigG-14-laion2B-39B-b160k + description: CLIP G - For SDXL IP-Adapter + category: video + files: + - source: "open_clip_model.safetensors" + dest: "clip_vision/CLIP-ViT-bigG-14-laion2B-39B-b160k.safetensors" + +- repo_id: google/siglip-so400m-patch14-384 + description: SigLIP - For FLUX models + category: video + files: + - source: "model.safetensors" + dest: "clip_vision/siglip-so400m-patch14-384.safetensors" + +- repo_id: ai-forever/Real-ESRGAN + description: RealESRGAN upscaling models + category: upscale + files: + - source: "RealESRGAN_x2.pth" + dest: "upscale_models/RealESRGAN_x2.pth" + - source: "RealESRGAN_x4.pth" + dest: "upscale_models/RealESRGAN_x4.pth" + +- repo_id: guoyww/animatediff + description: AnimateDiff Motion Modules + category: video + files: + - source: "mm_sd_v15_v2.ckpt" + dest: "animatediff_models/mm_sd_v15_v2.ckpt" + +- repo_id: h94/IP-Adapter + description: IP-Adapter + category: video + files: + - source: "sdxl_models/ip-adapter-plus_sdxl_vit-h.safetensors" + dest: "ipadapter/ip-adapter-plus_sdxl_vit-h.safetensors" + - source: "sdxl_models/ip-adapter-plus-face_sdxl_vit-h.safetensors" + dest: "ipadapter/ip-adapter-plus-face_sdxl_vit-h.safetensors" + - source: "sdxl_models/ip-adapter_sdxl_vit-h.safetensors" + dest: "ipadapter/ip-adapter_sdxl_vit-h.safetensors" + - source: "sdxl_models/ip-adapter_sdxl.safetensors" + dest: "ipadapter/ip-adapter_sdxl.safetensors" diff --git a/services/vllm/config_bge.yaml b/services/vllm/config_bge.yaml new file mode 100644 index 0000000..cae1465 --- /dev/null +++ b/services/vllm/config_bge.yaml @@ -0,0 +1,4 @@ +model: BAAI/bge-large-en-v1.5 +host: "0.0.0.0" +port: 8002 +uvicorn-log-level: "info" diff --git a/services/vllm/config_llama.yaml b/services/vllm/config_llama.yaml new file mode 100644 index 0000000..ddc68a1 --- /dev/null +++ b/services/vllm/config_llama.yaml @@ -0,0 +1,4 @@ +model: meta-llama/Llama-3.1-8B-Instruct +host: "0.0.0.0" +port: 8001 +uvicorn-log-level: "info" diff --git a/services/vllm/requirements.txt b/services/vllm/requirements.txt new file mode 100644 index 0000000..e7a6c77 --- /dev/null +++ b/services/vllm/requirements.txt @@ -0,0 +1 @@ +vllm diff --git a/services/webdav-sync/requirements.txt b/services/webdav-sync/requirements.txt new file mode 100644 index 0000000..0b2548a --- /dev/null +++ b/services/webdav-sync/requirements.txt @@ -0,0 +1,2 @@ +watchdog==3.0.0 +webdavclient3==3.14.6 diff --git a/services/webdav-sync/webdav_sync.py b/services/webdav-sync/webdav_sync.py new file mode 100644 index 0000000..7973b99 --- /dev/null +++ b/services/webdav-sync/webdav_sync.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python3 +""" +ComfyUI WebDAV Output Sync Service + +Monitors ComfyUI's output directory and automatically uploads new files +to HiDrive WebDAV storage. + +Usage: + python3 comfyui_webdav_sync.py + +Environment Variables: + WEBDAV_URL: WebDAV server URL (default: https://webdav.hidrive.ionos.com/) + WEBDAV_USERNAME: WebDAV username + WEBDAV_PASSWORD: WebDAV password + WEBDAV_REMOTE_PATH: Remote directory path (default: /users/valknar/Pictures/AI/ComfyUI) + COMFYUI_OUTPUT_DIR: Local directory to watch (default: /workspace/ComfyUI/output) + SYNC_DELAY: Seconds to wait after file write before upload (default: 2) +""" + +import os +import sys +import time +import logging +from pathlib import Path +from typing import Set +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler, FileCreatedEvent, FileModifiedEvent +from webdav3.client import Client + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler(sys.stdout) + ] +) +logger = logging.getLogger('comfyui-webdav-sync') + +# Configuration from environment variables +WEBDAV_URL = os.getenv('WEBDAV_URL') +WEBDAV_USERNAME = os.getenv('WEBDAV_USERNAME') +WEBDAV_PASSWORD = os.getenv('WEBDAV_PASSWORD') +WEBDAV_REMOTE_PATH = os.getenv('WEBDAV_REMOTE_PATH') +COMFYUI_OUTPUT_DIR = os.getenv('COMFYUI_OUTPUT_DIR', '/workspace/ComfyUI/output') +SYNC_DELAY = int(os.getenv('SYNC_DELAY', '2')) # Wait 2 seconds after file write +RETRY_ATTEMPTS = int(os.getenv('RETRY_ATTEMPTS', '3')) +RETRY_DELAY = int(os.getenv('RETRY_DELAY', '5')) + +# Validate required credentials +if not all([WEBDAV_URL, WEBDAV_USERNAME, WEBDAV_PASSWORD, WEBDAV_REMOTE_PATH]): + logger.error("Missing required WebDAV environment variables!") + logger.error("Required: WEBDAV_URL, WEBDAV_USERNAME, WEBDAV_PASSWORD, WEBDAV_REMOTE_PATH") + sys.exit(1) + + +class WebDAVClient: + """WebDAV client wrapper with retry logic""" + + def __init__(self): + options = { + 'webdav_hostname': WEBDAV_URL, + 'webdav_login': WEBDAV_USERNAME, + 'webdav_password': WEBDAV_PASSWORD, + 'webdav_timeout': 300, # 5 minutes for large files + } + self.client = Client(options) + self._ensure_remote_directory() + + def _ensure_remote_directory(self): + """Ensure the remote directory exists""" + try: + if not self.client.check(WEBDAV_REMOTE_PATH): + logger.info(f"Creating remote directory: {WEBDAV_REMOTE_PATH}") + # Create parent directories recursively + parts = Path(WEBDAV_REMOTE_PATH).parts + current = '' + for part in parts: + if not part or part == '/': + continue + current = f"{current}/{part}" + if not self.client.check(current): + self.client.mkdir(current) + logger.info(f"āœ“ Remote directory ready: {WEBDAV_REMOTE_PATH}") + except Exception as e: + logger.error(f"Failed to create remote directory: {e}") + raise + + def upload_file(self, local_path: str, remote_path: str) -> bool: + """Upload a file with retry logic""" + for attempt in range(1, RETRY_ATTEMPTS + 1): + try: + # Ensure parent directory exists + remote_dir = str(Path(remote_path).parent) + if not self.client.check(remote_dir): + self.client.mkdir(remote_dir) + + # Upload file + logger.info(f"[{attempt}/{RETRY_ATTEMPTS}] Uploading {Path(local_path).name} -> {remote_path}") + self.client.upload_sync(remote_path=remote_path, local_path=local_path) + + # Verify upload + if self.client.check(remote_path): + file_size = os.path.getsize(local_path) + logger.info(f"āœ“ Upload successful: {Path(local_path).name} ({file_size:,} bytes)") + return True + else: + logger.warning(f"Upload verification failed for {remote_path}") + + except Exception as e: + logger.error(f"Upload attempt {attempt} failed: {e}") + if attempt < RETRY_ATTEMPTS: + logger.info(f"Retrying in {RETRY_DELAY} seconds...") + time.sleep(RETRY_DELAY) + + logger.error(f"āœ— Failed to upload {local_path} after {RETRY_ATTEMPTS} attempts") + return False + + +class ComfyUIOutputHandler(FileSystemEventHandler): + """Handles file system events in ComfyUI output directory""" + + def __init__(self, webdav_client: WebDAVClient): + self.webdav_client = webdav_client + self.pending_files: Set[str] = set() # Files waiting for write completion + self.uploaded_files: Set[str] = set() # Track uploaded files + self.watch_dir = Path(COMFYUI_OUTPUT_DIR) + + def on_created(self, event): + """Handle file creation events""" + if event.is_directory: + return + + file_path = event.src_path + + # Ignore temp files and hidden files + if self._should_ignore(file_path): + return + + logger.info(f"New file detected: {Path(file_path).name}") + self.pending_files.add(file_path) + + # Schedule upload after delay (to ensure file write is complete) + self._schedule_upload(file_path) + + def on_modified(self, event): + """Handle file modification events""" + if event.is_directory: + return + + file_path = event.src_path + + # Ignore if already uploaded or should be ignored + if file_path in self.uploaded_files or self._should_ignore(file_path): + return + + # Add to pending if not already there + if file_path not in self.pending_files: + logger.info(f"Modified file detected: {Path(file_path).name}") + self.pending_files.add(file_path) + self._schedule_upload(file_path) + + def _should_ignore(self, file_path: str) -> bool: + """Check if file should be ignored""" + name = Path(file_path).name + + # Ignore hidden files, temp files, and partial downloads + if name.startswith('.') or name.endswith('.tmp') or name.endswith('.part'): + return True + + return False + + def _schedule_upload(self, file_path: str): + """Schedule file upload after ensuring write is complete""" + def upload_when_ready(): + time.sleep(SYNC_DELAY) # Wait for file write to complete + + # Verify file still exists and size is stable + if not os.path.exists(file_path): + logger.warning(f"File disappeared: {file_path}") + self.pending_files.discard(file_path) + return + + # Check if file size is stable (not being written) + size1 = os.path.getsize(file_path) + time.sleep(0.5) + size2 = os.path.getsize(file_path) + + if size1 != size2: + logger.info(f"File still being written: {Path(file_path).name}") + # Reschedule + self._schedule_upload(file_path) + return + + # Upload file + self._upload_file(file_path) + + # Run in background thread + import threading + threading.Thread(target=upload_when_ready, daemon=True).start() + + def _upload_file(self, local_path: str): + """Upload file to WebDAV""" + try: + # Calculate relative path from watch directory + rel_path = Path(local_path).relative_to(self.watch_dir) + remote_path = f"{WEBDAV_REMOTE_PATH}/{rel_path}".replace('\\', '/') + + # Upload + success = self.webdav_client.upload_file(local_path, remote_path) + + # Track result + self.pending_files.discard(local_path) + if success: + self.uploaded_files.add(local_path) + + except Exception as e: + logger.error(f"Failed to upload {local_path}: {e}") + self.pending_files.discard(local_path) + + +def main(): + """Main entry point""" + logger.info("=" * 80) + logger.info("ComfyUI WebDAV Output Sync Service") + logger.info("=" * 80) + logger.info(f"Watch directory: {COMFYUI_OUTPUT_DIR}") + logger.info(f"WebDAV URL: {WEBDAV_URL}") + logger.info(f"Remote path: {WEBDAV_REMOTE_PATH}") + logger.info(f"Sync delay: {SYNC_DELAY}s") + logger.info("=" * 80) + + # Verify watch directory exists + if not os.path.exists(COMFYUI_OUTPUT_DIR): + logger.error(f"Watch directory does not exist: {COMFYUI_OUTPUT_DIR}") + logger.info(f"Creating watch directory...") + os.makedirs(COMFYUI_OUTPUT_DIR, exist_ok=True) + + # Initialize WebDAV client + try: + webdav_client = WebDAVClient() + except Exception as e: + logger.error(f"Failed to initialize WebDAV client: {e}") + sys.exit(1) + + # Create event handler and observer + event_handler = ComfyUIOutputHandler(webdav_client) + observer = Observer() + observer.schedule(event_handler, COMFYUI_OUTPUT_DIR, recursive=True) + + # Start watching + observer.start() + logger.info("āœ“ Service started - watching for new files...") + + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + logger.info("Shutting down...") + observer.stop() + + observer.join() + logger.info("Service stopped") + + +if __name__ == '__main__': + main() diff --git a/start.sh b/start.sh new file mode 100644 index 0000000..3239bea --- /dev/null +++ b/start.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# RunPod container startup script +# This script initializes the container environment and starts all services + +set -e + +echo "========================================" +echo " RunPod AI Orchestrator - Starting" +echo "========================================" + +# [1/7] Start SSH server (required by RunPod) +echo "[1/7] Starting SSH server..." +mkdir -p ~/.ssh +chmod 700 ~/.ssh +echo "$PUBLIC_KEY" >> ~/.ssh/authorized_keys +chmod 700 ~/.ssh/authorized_keys +service ssh start +echo " āœ“ SSH server started" + +# [2/7] Cloning repositories +echo "[2/7] Cloning repositories..." +ssh-keyscan -p 2222 -t rsa dev.pivoine.art >> ~/.ssh/known_hosts +chmod 700 ~/.ssh/known_hosts +git clone https://dev.pivoine.art/valknar/bin.git "$PWD/bin" +git clone https://dev.pivoine.art/valknar/runpod-ai-orchestrator.git "$PWD/orchestrator" +echo " āœ“ Repositories cloned" + +# [3/7] Add $PWD/bin to PATH for arty and custom scripts +echo "[3/7] Configuring PATH..." +ln -sf "$PWD/bin/aritfact_git_download.sh" "$PWD/bin/arty" +export PATH="$PWD/bin:$PATH" +echo " āœ“ PATH updated: $PWD/bin added" + +# [4/7] Setting up with arty +echo "[4/7] Setting up with arty..." +cd "$PWD/orchestrator" +arty setup +cd - +echo " āœ“ Setup complete" + +# Display connection information +echo "" +echo "========================================" +echo " Container Ready" +echo "========================================" +echo "Services:" +echo " - SSH: port 22" +if [ -n "${TAILSCALE_IP:-}" ] && [ "$TAILSCALE_IP" != "not connected" ]; then + echo " - Tailscale IP: $TAILSCALE_IP" +fi +echo "" +echo "========================================" + +# Keep container running +echo "Container is running. Press Ctrl+C to stop." +sleep infinity diff --git a/supervisord.conf b/supervisord.conf new file mode 100644 index 0000000..4810dc4 --- /dev/null +++ b/supervisord.conf @@ -0,0 +1,120 @@ +[supervisord] +logfile=.logs/supervisord.log +pidfile=supervisord.pid +childlogdir=.logs +nodaemon=false +loglevel=info + +[unix_http_server] +file=supervisor.sock +chmod=0700 + +[supervisorctl] +serverurl=unix://supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +# Web interface for monitoring (proxied via nginx + Authelia) +[inet_http_server] +port=0.0.0.0:9001 +# Authentication disabled - handled by Authelia SSO + +# ComfyUI Server +[program:comfyui] +command=services/comfyui/venv/bin/python services/comfyui/main.py --listen 0.0.0.0 --port 8188 --enable-cors-header --preview-method auto +directory=. +autostart=true +autorestart=true +startretries=3 +stderr_logfile=.logs/comfyui.err.log +stdout_logfile=.logs/comfyui.out.log +stdout_logfile_maxbytes=50MB +stdout_logfile_backups=10 +stderr_logfile_maxbytes=50MB +stderr_logfile_backups=10 +environment=PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True",TQDM_DISABLE="1" +priority=100 +stopwaitsecs=30 + + +# ComfyUI WebDAV Sync Service +[program:webdav-sync] +command=services/webdav-sync/venv/bin/python services/webdav-sync/webdav_sync.py +directory=. +autostart=true +autorestart=true +startretries=3 +stderr_logfile=.logs/webdav-sync.err.log +stdout_logfile=.logs/webdav-sync.out.log +stdout_logfile_maxbytes=50MB +stdout_logfile_backups=10 +stderr_logfile_maxbytes=50MB +stderr_logfile_backups=10 +environment=WEBDAV_URL="%(ENV_WEBDAV_URL)s",WEBDAV_USERNAME="%(ENV_WEBDAV_USERNAME)s",WEBDAV_PASSWORD="%(ENV_WEBDAV_PASSWORD)s",WEBDAV_REMOTE_PATH="%(ENV_WEBDAV_REMOTE_PATH)s",COMFYUI_OUTPUT_DIR="../comfyui/output" +priority=150 +stopwaitsecs=10 + + +# vLLM Llama 3.1 8B Server (Port 8001) +[program:llama] +command=services/vllm/venv/bin/vllm serve --config services/vllm/config_llama.yaml +directory=. +autostart=false +autorestart=true +startretries=3 +stderr_logfile=.logs/llama.err.log +stdout_logfile=.logs/llama.out.log +stdout_logfile_maxbytes=50MB +stdout_logfile_backups=10 +stderr_logfile_maxbytes=50MB +stderr_logfile_backups=10 +environment=HF_TOKEN="%(ENV_HF_TOKEN)s" +priority=200 +stopwaitsecs=30 + +# vLLM BGE Embedding Server (Port 8002) +[program:bge] +command=services/vllm/venv/bin/vllm serve --config services/vllm/config_bge.yaml +directory=. +autostart=false +autorestart=true +startretries=3 +stderr_logfile=.logs/bge.err.log +stdout_logfile=.logs/bge.out.log +stdout_logfile_maxbytes=50MB +stdout_logfile_backups=10 +stderr_logfile_maxbytes=50MB +stderr_logfile_backups=10 +environment=HF_TOKEN="%(ENV_HF_TOKEN)s" +priority=201 +stopwaitsecs=30 + + +# AudioCraft Studio Service +[program:audiocraft] +command=services/audiocraft/venv/bin/python services/audiocraft/main.py +directory=. +autostart=true +autorestart=true +startretries=3 +stderr_logfile=.logs/audiocraft.err.log +stdout_logfile=.logs/audiocraft.out.log +stdout_logfile_maxbytes=50MB +stdout_logfile_backups=10 +stderr_logfile_maxbytes=50MB +stderr_logfile_backups=10 +priority=300 +stopwaitsecs=10 + +[group:comfyui] +programs=comfyui,webdav-sync +priority=100 + +[group:vllm] +programs=llama,bge +priority=200 + +[group:audiocraft] +programs=audiocraft +priority=300