refactor: consolidate model management into Ansible playbook

Remove flux/musicgen standalone implementations in favor of ComfyUI:
- Delete models/flux/ and models/musicgen/ directories
- Remove redundant scripts (install.sh, download-models.sh, prepare-template.sh)
- Update README.md to reference Ansible playbook commands
- Update playbook.yml to remove flux/musicgen service definitions
- Add COMFYUI_MODELS.md with comprehensive model installation guide
- Update stop-all.sh to only manage orchestrator and vLLM services

All model downloads and dependency management now handled via
Ansible playbook tags (base, python, vllm, comfyui, comfyui-essential).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-22 00:31:26 +01:00
parent 7edf17551a
commit c9b01eef68
12 changed files with 774 additions and 874 deletions

View File

@@ -1,21 +0,0 @@
# Flux.1 Image Generation Service Dependencies
# Diffusers library (for Flux.1 pipeline)
diffusers==0.30.0
# PyTorch (required by diffusers)
torch==2.1.0
torchvision==0.16.0
# Transformers (for model components)
transformers==4.36.0
# Image processing
Pillow==10.1.0
# Accelerate (for optimizations)
accelerate==0.25.0
# Additional dependencies for Flux
sentencepiece==0.1.99
protobuf==4.25.1

View File

@@ -1,193 +0,0 @@
#!/usr/bin/env python3
"""
Flux.1 Image Generation Service
OpenAI-compatible image generation using Flux.1 Schnell model.
Provides /v1/images/generations endpoint.
"""
import base64
import io
import os
from typing import Optional
import torch
from diffusers import FluxPipeline
from fastapi import HTTPException
from PIL import Image
from pydantic import BaseModel, Field
# Import base service class
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..'))
from core.base_service import GPUService
class ImageGenerationRequest(BaseModel):
"""Image generation request (OpenAI-compatible)"""
model: str = Field(default="flux-schnell", description="Model name")
prompt: str = Field(..., description="Text description of the image to generate")
n: int = Field(default=1, ge=1, le=4, description="Number of images to generate")
size: str = Field(default="1024x1024", description="Image size (e.g., 512x512, 1024x1024)")
response_format: str = Field(default="b64_json", description="Response format: url or b64_json")
quality: str = Field(default="standard", description="Image quality: standard or hd")
style: str = Field(default="natural", description="Image style: natural or vivid")
class ImageGenerationResponse(BaseModel):
"""Image generation response (OpenAI-compatible)"""
created: int = Field(..., description="Unix timestamp")
data: list = Field(..., description="List of generated images")
class FluxService(GPUService):
"""Flux.1 Schnell image generation service"""
def __init__(self):
# Get port from environment or use default
port = int(os.getenv("PORT", "8002"))
super().__init__(name="flux-schnell", port=port)
# Service-specific attributes
self.pipeline: Optional[FluxPipeline] = None
self.model_name = os.getenv("MODEL_NAME", "black-forest-labs/FLUX.1-schnell")
async def initialize(self):
"""Initialize Flux.1 pipeline"""
await super().initialize()
self.logger.info(f"Loading Flux.1 pipeline: {self.model_name}")
# Load pipeline
self.pipeline = FluxPipeline.from_pretrained(
self.model_name,
torch_dtype=torch.bfloat16,
cache_dir=os.getenv("HF_CACHE_DIR", "/workspace/huggingface_cache")
)
# Move to GPU
if torch.cuda.is_available():
self.pipeline = self.pipeline.to("cuda")
self.logger.info("Flux.1 pipeline loaded on GPU")
else:
self.logger.warning("GPU not available, running on CPU (very slow)")
# Enable memory optimizations
if hasattr(self.pipeline, 'enable_model_cpu_offload'):
# This moves models to GPU only when needed, saving VRAM
self.pipeline.enable_model_cpu_offload()
self.logger.info("Flux.1 pipeline initialized successfully")
async def cleanup(self):
"""Cleanup resources"""
await super().cleanup()
if self.pipeline:
self.logger.info("Flux.1 pipeline cleanup")
self.pipeline = None
def parse_size(self, size_str: str) -> tuple[int, int]:
"""Parse size string like '1024x1024' into (width, height)"""
try:
parts = size_str.lower().split('x')
if len(parts) != 2:
return (1024, 1024)
width = int(parts[0])
height = int(parts[1])
return (width, height)
except:
return (1024, 1024)
def image_to_base64(self, image: Image.Image) -> str:
"""Convert PIL Image to base64 string"""
buffered = io.BytesIO()
image.save(buffered, format="PNG")
img_bytes = buffered.getvalue()
return base64.b64encode(img_bytes).decode('utf-8')
def create_app(self):
"""Create FastAPI routes"""
@self.app.get("/")
async def root():
"""Root endpoint"""
return {
"service": "Flux.1 Schnell Image Generation",
"model": self.model_name,
"max_images": 4
}
@self.app.get("/v1/models")
async def list_models():
"""List available models (OpenAI-compatible)"""
return {
"object": "list",
"data": [
{
"id": "flux-schnell",
"object": "model",
"created": 1234567890,
"owned_by": "black-forest-labs",
"permission": [],
"root": self.model_name,
"parent": None,
}
]
}
@self.app.post("/v1/images/generations")
async def generate_image(request: ImageGenerationRequest) -> ImageGenerationResponse:
"""Generate images from text prompt (OpenAI-compatible)"""
if not self.pipeline:
raise HTTPException(status_code=503, detail="Model not initialized")
self.logger.info(f"Generating {request.n} image(s): {request.prompt[:100]}...")
try:
# Parse image size
width, height = self.parse_size(request.size)
self.logger.info(f"Size: {width}x{height}")
# Generate images
images = []
for i in range(request.n):
self.logger.info(f"Generating image {i+1}/{request.n}")
# Flux.1 Schnell uses 4 inference steps for speed
image = self.pipeline(
prompt=request.prompt,
width=width,
height=height,
num_inference_steps=4, # Schnell is optimized for 4 steps
guidance_scale=0.0, # Schnell doesn't use guidance
).images[0]
# Convert to base64
if request.response_format == "b64_json":
image_data = {
"b64_json": self.image_to_base64(image)
}
else:
# For URL format, we'd need to save and serve the file
# For now, we'll return base64 anyway
image_data = {
"b64_json": self.image_to_base64(image)
}
images.append(image_data)
self.logger.info(f"Generated {request.n} image(s) successfully")
return ImageGenerationResponse(
created=1234567890,
data=images
)
except Exception as e:
self.logger.error(f"Error generating image: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
service = FluxService()
service.run()

View File

@@ -1,11 +0,0 @@
# MusicGen Music Generation Service Dependencies
# AudioCraft (contains MusicGen)
audiocraft==1.3.0
# PyTorch (required by AudioCraft)
torch==2.1.0
torchaudio==2.1.0
# Additional dependencies
transformers==4.36.0

View File

@@ -1,172 +0,0 @@
#!/usr/bin/env python3
"""
MusicGen Music Generation Service
OpenAI-compatible music generation using Meta's MusicGen Medium model.
Provides /v1/audio/generations endpoint.
"""
import base64
import io
import os
import tempfile
from typing import Optional
import torch
import torchaudio
from audiocraft.models import MusicGen
from fastapi import HTTPException
from pydantic import BaseModel, Field
# Import base service class
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..'))
from core.base_service import GPUService
class AudioGenerationRequest(BaseModel):
"""Music generation request"""
model: str = Field(default="musicgen-medium", description="Model name")
prompt: str = Field(..., description="Text description of the music to generate")
duration: float = Field(default=30.0, ge=1.0, le=30.0, description="Duration in seconds")
temperature: float = Field(default=1.0, ge=0.1, le=2.0, description="Sampling temperature")
top_k: int = Field(default=250, ge=0, le=500, description="Top-k sampling")
top_p: float = Field(default=0.0, ge=0.0, le=1.0, description="Top-p (nucleus) sampling")
cfg_coef: float = Field(default=3.0, ge=1.0, le=15.0, description="Classifier-free guidance coefficient")
response_format: str = Field(default="wav", description="Audio format (wav or mp3)")
class AudioGenerationResponse(BaseModel):
"""Music generation response"""
audio: str = Field(..., description="Base64-encoded audio data")
format: str = Field(..., description="Audio format (wav or mp3)")
duration: float = Field(..., description="Duration in seconds")
sample_rate: int = Field(..., description="Sample rate in Hz")
class MusicGenService(GPUService):
"""MusicGen music generation service"""
def __init__(self):
# Get port from environment or use default
port = int(os.getenv("PORT", "8003"))
super().__init__(name="musicgen-medium", port=port)
# Service-specific attributes
self.model: Optional[MusicGen] = None
self.model_name = os.getenv("MODEL_NAME", "facebook/musicgen-medium")
async def initialize(self):
"""Initialize MusicGen model"""
await super().initialize()
self.logger.info(f"Loading MusicGen model: {self.model_name}")
# Load model
device = "cuda" if torch.cuda.is_available() else "cpu"
self.model = MusicGen.get_pretrained(self.model_name, device=device)
self.logger.info(f"MusicGen model loaded successfully")
self.logger.info(f"Max duration: 30 seconds at {self.model.sample_rate}Hz")
async def cleanup(self):
"""Cleanup resources"""
await super().cleanup()
if self.model:
self.logger.info("MusicGen model cleanup")
self.model = None
def create_app(self):
"""Create FastAPI routes"""
@self.app.get("/")
async def root():
"""Root endpoint"""
return {
"service": "MusicGen API Server",
"model": self.model_name,
"max_duration": 30.0,
"sample_rate": self.model.sample_rate if self.model else 32000
}
@self.app.get("/v1/models")
async def list_models():
"""List available models (OpenAI-compatible)"""
return {
"object": "list",
"data": [
{
"id": "musicgen-medium",
"object": "model",
"created": 1234567890,
"owned_by": "meta",
"permission": [],
"root": self.model_name,
"parent": None,
}
]
}
@self.app.post("/v1/audio/generations")
async def generate_audio(request: AudioGenerationRequest) -> AudioGenerationResponse:
"""Generate music from text prompt"""
if not self.model:
raise HTTPException(status_code=503, detail="Model not initialized")
self.logger.info(f"Generating music: {request.prompt[:100]}...")
self.logger.info(f"Duration: {request.duration}s, Temperature: {request.temperature}")
try:
# Set generation parameters
self.model.set_generation_params(
duration=request.duration,
temperature=request.temperature,
top_k=request.top_k,
top_p=request.top_p,
cfg_coef=request.cfg_coef,
)
# Generate audio
descriptions = [request.prompt]
with torch.no_grad():
wav = self.model.generate(descriptions)
# wav shape: [batch_size, channels, samples]
# Extract first batch item
audio_data = wav[0].cpu() # [channels, samples]
# Get sample rate
sample_rate = self.model.sample_rate
# Save to temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_path = temp_file.name
torchaudio.save(temp_path, audio_data, sample_rate)
# Read audio file and encode to base64
with open(temp_path, 'rb') as f:
audio_bytes = f.read()
# Clean up temporary file
os.unlink(temp_path)
# Encode to base64
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
self.logger.info(f"Generated {request.duration}s of audio")
return AudioGenerationResponse(
audio=audio_base64,
format="wav",
duration=request.duration,
sample_rate=sample_rate
)
except Exception as e:
self.logger.error(f"Error generating audio: {e}")
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
service = MusicGenService()
service.run()