models/flux/server.py

#!/usr/bin/env python3
"""
Flux.1 Image Generation Service

OpenAI-compatible image generation using Flux.1 Schnell model.
Provides /v1/images/generations endpoint.
"""

import base64
import io
import os
from typing import Optional

import torch
from diffusers import FluxPipeline
from fastapi import HTTPException
from PIL import Image
from pydantic import BaseModel, Field

# Import base service class
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..'))
from core.base_service import GPUService


class ImageGenerationRequest(BaseModel):
    """Image generation request (OpenAI-compatible)"""
    model: str = Field(default="flux-schnell", description="Model name")
    prompt: str = Field(..., description="Text description of the image to generate")
    n: int = Field(default=1, ge=1, le=4, description="Number of images to generate")
    size: str = Field(default="1024x1024", description="Image size (e.g., 512x512, 1024x1024)")
    response_format: str = Field(default="b64_json", description="Response format: url or b64_json")
    quality: str = Field(default="standard", description="Image quality: standard or hd")
    style: str = Field(default="natural", description="Image style: natural or vivid")


class ImageGenerationResponse(BaseModel):
    """Image generation response (OpenAI-compatible)"""
    created: int = Field(..., description="Unix timestamp")
    data: list = Field(..., description="List of generated images")


class FluxService(GPUService):
    """Flux.1 Schnell image generation service"""

    def __init__(self):
        # Get port from environment or use default
        port = int(os.getenv("PORT", "8002"))
        super().__init__(name="flux-schnell", port=port)

        # Service-specific attributes
        self.pipeline: Optional[FluxPipeline] = None
        self.model_name = os.getenv("MODEL_NAME", "black-forest-labs/FLUX.1-schnell")

    async def initialize(self):
        """Initialize Flux.1 pipeline"""
        await super().initialize()

        self.logger.info(f"Loading Flux.1 pipeline: {self.model_name}")

        # Load pipeline
        self.pipeline = FluxPipeline.from_pretrained(
            self.model_name,
            torch_dtype=torch.bfloat16,
            cache_dir=os.getenv("HF_CACHE_DIR", "/workspace/huggingface_cache")
        )

        # Move to GPU
        if torch.cuda.is_available():
            self.pipeline = self.pipeline.to("cuda")
            self.logger.info("Flux.1 pipeline loaded on GPU")
        else:
            self.logger.warning("GPU not available, running on CPU (very slow)")

        # Enable memory optimizations
        if hasattr(self.pipeline, 'enable_model_cpu_offload'):
            # This moves models to GPU only when needed, saving VRAM
            self.pipeline.enable_model_cpu_offload()

        self.logger.info("Flux.1 pipeline initialized successfully")

    async def cleanup(self):
        """Cleanup resources"""
        await super().cleanup()
        if self.pipeline:
            self.logger.info("Flux.1 pipeline cleanup")
            self.pipeline = None

    def parse_size(self, size_str: str) -> tuple[int, int]:
        """Parse size string like '1024x1024' into (width, height)"""
        try:
            parts = size_str.lower().split('x')
            if len(parts) != 2:
                return (1024, 1024)
            width = int(parts[0])
            height = int(parts[1])
            return (width, height)
        except:
            return (1024, 1024)

    def image_to_base64(self, image: Image.Image) -> str:
        """Convert PIL Image to base64 string"""
        buffered = io.BytesIO()
        image.save(buffered, format="PNG")
        img_bytes = buffered.getvalue()
        return base64.b64encode(img_bytes).decode('utf-8')

    def create_app(self):
        """Create FastAPI routes"""

        @self.app.get("/")
        async def root():
            """Root endpoint"""
            return {
                "service": "Flux.1 Schnell Image Generation",
                "model": self.model_name,
                "max_images": 4
            }

        @self.app.get("/v1/models")
        async def list_models():
            """List available models (OpenAI-compatible)"""
            return {
                "object": "list",
                "data": [
                    {
                        "id": "flux-schnell",
                        "object": "model",
                        "created": 1234567890,
                        "owned_by": "black-forest-labs",
                        "permission": [],
                        "root": self.model_name,
                        "parent": None,
                    }
                ]
            }

        @self.app.post("/v1/images/generations")
        async def generate_image(request: ImageGenerationRequest) -> ImageGenerationResponse:
            """Generate images from text prompt (OpenAI-compatible)"""
            if not self.pipeline:
                raise HTTPException(status_code=503, detail="Model not initialized")

            self.logger.info(f"Generating {request.n} image(s): {request.prompt[:100]}...")

            try:
                # Parse image size
                width, height = self.parse_size(request.size)
                self.logger.info(f"Size: {width}x{height}")

                # Generate images
                images = []
                for i in range(request.n):
                    self.logger.info(f"Generating image {i+1}/{request.n}")

                    # Flux.1 Schnell uses 4 inference steps for speed
                    image = self.pipeline(
                        prompt=request.prompt,
                        width=width,
                        height=height,
                        num_inference_steps=4,  # Schnell is optimized for 4 steps
                        guidance_scale=0.0,  # Schnell doesn't use guidance
                    ).images[0]

                    # Convert to base64
                    if request.response_format == "b64_json":
                        image_data = {
                            "b64_json": self.image_to_base64(image)
                        }
                    else:
                        # For URL format, we'd need to save and serve the file
                        # For now, we'll return base64 anyway
                        image_data = {
                            "b64_json": self.image_to_base64(image)
                        }

                    images.append(image_data)

                self.logger.info(f"Generated {request.n} image(s) successfully")

                return ImageGenerationResponse(
                    created=1234567890,
                    data=images
                )

            except Exception as e:
                self.logger.error(f"Error generating image: {e}", exc_info=True)
                raise HTTPException(status_code=500, detail=str(e))


if __name__ == "__main__":
    service = FluxService()
    service.run()
feat: implement Ansible-based process architecture for RunPod Major architecture overhaul to address RunPod Docker limitations: Core Infrastructure: - Add base_service.py: Abstract base class for all AI services - Add service_manager.py: Process lifecycle management - Add core/requirements.txt: Core dependencies Model Services (Standalone Python): - Add models/vllm/server.py: Qwen 2.5 7B text generation - Add models/flux/server.py: Flux.1 Schnell image generation - Add models/musicgen/server.py: MusicGen Medium music generation - Each service inherits from GPUService base class - OpenAI-compatible APIs - Standalone execution support Ansible Deployment: - Add playbook.yml: Comprehensive deployment automation - Add ansible.cfg: Ansible configuration - Add inventory.yml: Localhost inventory - Tags: base, python, dependencies, models, tailscale, validate, cleanup Scripts: - Add scripts/install.sh: Full installation wrapper - Add scripts/download-models.sh: Model download wrapper - Add scripts/start-all.sh: Start orchestrator - Add scripts/stop-all.sh: Stop all services Documentation: - Update ARCHITECTURE.md: Document distributed VPS+GPU architecture Benefits: - No Docker: Avoids RunPod CAP_SYS_ADMIN limitations - Fully reproducible via Ansible - Extensible: Add models in 3 steps - Direct Python execution (no container overhead) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-21 15:37:18 +01:00			`#!/usr/bin/env python3`
			`"""`
			`Flux.1 Image Generation Service`

			`OpenAI-compatible image generation using Flux.1 Schnell model.`
			`Provides /v1/images/generations endpoint.`
			`"""`

			`import base64`
			`import io`
			`import os`
			`from typing import Optional`

			`import torch`
			`from diffusers import FluxPipeline`
			`from fastapi import HTTPException`
			`from PIL import Image`
			`from pydantic import BaseModel, Field`

			`# Import base service class`
			`import sys`
			`sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..'))`
			`from core.base_service import GPUService`


			`class ImageGenerationRequest(BaseModel):`
			`"""Image generation request (OpenAI-compatible)"""`
			`model: str = Field(default="flux-schnell", description="Model name")`
			`prompt: str = Field(..., description="Text description of the image to generate")`
			`n: int = Field(default=1, ge=1, le=4, description="Number of images to generate")`
			`size: str = Field(default="1024x1024", description="Image size (e.g., 512x512, 1024x1024)")`
			`response_format: str = Field(default="b64_json", description="Response format: url or b64_json")`
			`quality: str = Field(default="standard", description="Image quality: standard or hd")`
			`style: str = Field(default="natural", description="Image style: natural or vivid")`


			`class ImageGenerationResponse(BaseModel):`
			`"""Image generation response (OpenAI-compatible)"""`
			`created: int = Field(..., description="Unix timestamp")`
			`data: list = Field(..., description="List of generated images")`


			`class FluxService(GPUService):`
			`"""Flux.1 Schnell image generation service"""`

			`def __init__(self):`
			`# Get port from environment or use default`
			`port = int(os.getenv("PORT", "8002"))`
			`super().__init__(name="flux-schnell", port=port)`

			`# Service-specific attributes`
			`self.pipeline: Optional[FluxPipeline] = None`
			`self.model_name = os.getenv("MODEL_NAME", "black-forest-labs/FLUX.1-schnell")`

			`async def initialize(self):`
			`"""Initialize Flux.1 pipeline"""`
			`await super().initialize()`

			`self.logger.info(f"Loading Flux.1 pipeline: {self.model_name}")`

			`# Load pipeline`
			`self.pipeline = FluxPipeline.from_pretrained(`
			`self.model_name,`
			`torch_dtype=torch.bfloat16,`
			`cache_dir=os.getenv("HF_CACHE_DIR", "/workspace/huggingface_cache")`
			`)`

			`# Move to GPU`
			`if torch.cuda.is_available():`
			`self.pipeline = self.pipeline.to("cuda")`
			`self.logger.info("Flux.1 pipeline loaded on GPU")`
			`else:`
			`self.logger.warning("GPU not available, running on CPU (very slow)")`

			`# Enable memory optimizations`
			`if hasattr(self.pipeline, 'enable_model_cpu_offload'):`
			`# This moves models to GPU only when needed, saving VRAM`
			`self.pipeline.enable_model_cpu_offload()`

			`self.logger.info("Flux.1 pipeline initialized successfully")`

			`async def cleanup(self):`
			`"""Cleanup resources"""`
			`await super().cleanup()`
			`if self.pipeline:`
			`self.logger.info("Flux.1 pipeline cleanup")`
			`self.pipeline = None`

			`def parse_size(self, size_str: str) -> tuple[int, int]:`
			`"""Parse size string like '1024x1024' into (width, height)"""`
			`try:`
			`parts = size_str.lower().split('x')`
			`if len(parts) != 2:`
			`return (1024, 1024)`
			`width = int(parts[0])`
			`height = int(parts[1])`
			`return (width, height)`
			`except:`
			`return (1024, 1024)`

			`def image_to_base64(self, image: Image.Image) -> str:`
			`"""Convert PIL Image to base64 string"""`
			`buffered = io.BytesIO()`
			`image.save(buffered, format="PNG")`
			`img_bytes = buffered.getvalue()`
			`return base64.b64encode(img_bytes).decode('utf-8')`

			`def create_app(self):`
			`"""Create FastAPI routes"""`

			`@self.app.get("/")`
			`async def root():`
			`"""Root endpoint"""`
			`return {`
			`"service": "Flux.1 Schnell Image Generation",`
			`"model": self.model_name,`
			`"max_images": 4`
			`}`

			`@self.app.get("/v1/models")`
			`async def list_models():`
			`"""List available models (OpenAI-compatible)"""`
			`return {`
			`"object": "list",`
			`"data": [`
			`{`
			`"id": "flux-schnell",`
			`"object": "model",`
			`"created": 1234567890,`
			`"owned_by": "black-forest-labs",`
			`"permission": [],`
			`"root": self.model_name,`
			`"parent": None,`
			`}`
			`]`
			`}`

			`@self.app.post("/v1/images/generations")`
			`async def generate_image(request: ImageGenerationRequest) -> ImageGenerationResponse:`
			`"""Generate images from text prompt (OpenAI-compatible)"""`
			`if not self.pipeline:`
			`raise HTTPException(status_code=503, detail="Model not initialized")`

			`self.logger.info(f"Generating {request.n} image(s): {request.prompt[:100]}...")`

			`try:`
			`# Parse image size`
			`width, height = self.parse_size(request.size)`
			`self.logger.info(f"Size: {width}x{height}")`

			`# Generate images`
			`images = []`
			`for i in range(request.n):`
			`self.logger.info(f"Generating image {i+1}/{request.n}")`

			`# Flux.1 Schnell uses 4 inference steps for speed`
			`image = self.pipeline(`
			`prompt=request.prompt,`
			`width=width,`
			`height=height,`
			`num_inference_steps=4, # Schnell is optimized for 4 steps`
			`guidance_scale=0.0, # Schnell doesn't use guidance`
			`).images[0]`

			`# Convert to base64`
			`if request.response_format == "b64_json":`
			`image_data = {`
			`"b64_json": self.image_to_base64(image)`
			`}`
			`else:`
			`# For URL format, we'd need to save and serve the file`
			`# For now, we'll return base64 anyway`
			`image_data = {`
			`"b64_json": self.image_to_base64(image)`
			`}`

			`images.append(image_data)`

			`self.logger.info(f"Generated {request.n} image(s) successfully")`

			`return ImageGenerationResponse(`
			`created=1234567890,`
			`data=images`
			`)`

			`except Exception as e:`
			`self.logger.error(f"Error generating image: {e}", exc_info=True)`
			`raise HTTPException(status_code=500, detail=str(e))`


			`if __name__ == "__main__":`
			`service = FluxService()`
			`service.run()`