Files
runpod/models/flux/server.py
Sebastian Krüger 9ee626a78e feat: implement Ansible-based process architecture for RunPod
Major architecture overhaul to address RunPod Docker limitations:

Core Infrastructure:
- Add base_service.py: Abstract base class for all AI services
- Add service_manager.py: Process lifecycle management
- Add core/requirements.txt: Core dependencies

Model Services (Standalone Python):
- Add models/vllm/server.py: Qwen 2.5 7B text generation
- Add models/flux/server.py: Flux.1 Schnell image generation
- Add models/musicgen/server.py: MusicGen Medium music generation
- Each service inherits from GPUService base class
- OpenAI-compatible APIs
- Standalone execution support

Ansible Deployment:
- Add playbook.yml: Comprehensive deployment automation
- Add ansible.cfg: Ansible configuration
- Add inventory.yml: Localhost inventory
- Tags: base, python, dependencies, models, tailscale, validate, cleanup

Scripts:
- Add scripts/install.sh: Full installation wrapper
- Add scripts/download-models.sh: Model download wrapper
- Add scripts/start-all.sh: Start orchestrator
- Add scripts/stop-all.sh: Stop all services

Documentation:
- Update ARCHITECTURE.md: Document distributed VPS+GPU architecture

Benefits:
- No Docker: Avoids RunPod CAP_SYS_ADMIN limitations
- Fully reproducible via Ansible
- Extensible: Add models in 3 steps
- Direct Python execution (no container overhead)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-21 15:37:18 +01:00

194 lines
6.8 KiB
Python

#!/usr/bin/env python3
"""
Flux.1 Image Generation Service
OpenAI-compatible image generation using Flux.1 Schnell model.
Provides /v1/images/generations endpoint.
"""
import base64
import io
import os
from typing import Optional
import torch
from diffusers import FluxPipeline
from fastapi import HTTPException
from PIL import Image
from pydantic import BaseModel, Field
# Import base service class
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..'))
from core.base_service import GPUService
class ImageGenerationRequest(BaseModel):
"""Image generation request (OpenAI-compatible)"""
model: str = Field(default="flux-schnell", description="Model name")
prompt: str = Field(..., description="Text description of the image to generate")
n: int = Field(default=1, ge=1, le=4, description="Number of images to generate")
size: str = Field(default="1024x1024", description="Image size (e.g., 512x512, 1024x1024)")
response_format: str = Field(default="b64_json", description="Response format: url or b64_json")
quality: str = Field(default="standard", description="Image quality: standard or hd")
style: str = Field(default="natural", description="Image style: natural or vivid")
class ImageGenerationResponse(BaseModel):
"""Image generation response (OpenAI-compatible)"""
created: int = Field(..., description="Unix timestamp")
data: list = Field(..., description="List of generated images")
class FluxService(GPUService):
"""Flux.1 Schnell image generation service"""
def __init__(self):
# Get port from environment or use default
port = int(os.getenv("PORT", "8002"))
super().__init__(name="flux-schnell", port=port)
# Service-specific attributes
self.pipeline: Optional[FluxPipeline] = None
self.model_name = os.getenv("MODEL_NAME", "black-forest-labs/FLUX.1-schnell")
async def initialize(self):
"""Initialize Flux.1 pipeline"""
await super().initialize()
self.logger.info(f"Loading Flux.1 pipeline: {self.model_name}")
# Load pipeline
self.pipeline = FluxPipeline.from_pretrained(
self.model_name,
torch_dtype=torch.bfloat16,
cache_dir=os.getenv("HF_CACHE_DIR", "/workspace/huggingface_cache")
)
# Move to GPU
if torch.cuda.is_available():
self.pipeline = self.pipeline.to("cuda")
self.logger.info("Flux.1 pipeline loaded on GPU")
else:
self.logger.warning("GPU not available, running on CPU (very slow)")
# Enable memory optimizations
if hasattr(self.pipeline, 'enable_model_cpu_offload'):
# This moves models to GPU only when needed, saving VRAM
self.pipeline.enable_model_cpu_offload()
self.logger.info("Flux.1 pipeline initialized successfully")
async def cleanup(self):
"""Cleanup resources"""
await super().cleanup()
if self.pipeline:
self.logger.info("Flux.1 pipeline cleanup")
self.pipeline = None
def parse_size(self, size_str: str) -> tuple[int, int]:
"""Parse size string like '1024x1024' into (width, height)"""
try:
parts = size_str.lower().split('x')
if len(parts) != 2:
return (1024, 1024)
width = int(parts[0])
height = int(parts[1])
return (width, height)
except:
return (1024, 1024)
def image_to_base64(self, image: Image.Image) -> str:
"""Convert PIL Image to base64 string"""
buffered = io.BytesIO()
image.save(buffered, format="PNG")
img_bytes = buffered.getvalue()
return base64.b64encode(img_bytes).decode('utf-8')
def create_app(self):
"""Create FastAPI routes"""
@self.app.get("/")
async def root():
"""Root endpoint"""
return {
"service": "Flux.1 Schnell Image Generation",
"model": self.model_name,
"max_images": 4
}
@self.app.get("/v1/models")
async def list_models():
"""List available models (OpenAI-compatible)"""
return {
"object": "list",
"data": [
{
"id": "flux-schnell",
"object": "model",
"created": 1234567890,
"owned_by": "black-forest-labs",
"permission": [],
"root": self.model_name,
"parent": None,
}
]
}
@self.app.post("/v1/images/generations")
async def generate_image(request: ImageGenerationRequest) -> ImageGenerationResponse:
"""Generate images from text prompt (OpenAI-compatible)"""
if not self.pipeline:
raise HTTPException(status_code=503, detail="Model not initialized")
self.logger.info(f"Generating {request.n} image(s): {request.prompt[:100]}...")
try:
# Parse image size
width, height = self.parse_size(request.size)
self.logger.info(f"Size: {width}x{height}")
# Generate images
images = []
for i in range(request.n):
self.logger.info(f"Generating image {i+1}/{request.n}")
# Flux.1 Schnell uses 4 inference steps for speed
image = self.pipeline(
prompt=request.prompt,
width=width,
height=height,
num_inference_steps=4, # Schnell is optimized for 4 steps
guidance_scale=0.0, # Schnell doesn't use guidance
).images[0]
# Convert to base64
if request.response_format == "b64_json":
image_data = {
"b64_json": self.image_to_base64(image)
}
else:
# For URL format, we'd need to save and serve the file
# For now, we'll return base64 anyway
image_data = {
"b64_json": self.image_to_base64(image)
}
images.append(image_data)
self.logger.info(f"Generated {request.n} image(s) successfully")
return ImageGenerationResponse(
created=1234567890,
data=images
)
except Exception as e:
self.logger.error(f"Error generating image: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
service = FluxService()
service.run()