Initial implementation of AudioCraft Studio

Complete web interface for Meta's AudioCraft AI audio generation:

- Gradio UI with tabs for all 5 model families (MusicGen, AudioGen,
  MAGNeT, MusicGen Style, JASCO)
- REST API with FastAPI, OpenAPI docs, and API key auth
- VRAM management with ComfyUI coexistence support
- SQLite database for project/generation history
- Batch processing queue for async generation
- Docker deployment optimized for RunPod with RTX 4090

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-25 19:34:27 +01:00
commit ffbf02b12c
67 changed files with 12032 additions and 0 deletions

5
config/__init__.py Normal file
View File

@@ -0,0 +1,5 @@
"""Configuration module for AudioCraft Studio."""
from config.settings import Settings, get_settings
__all__ = ["Settings", "get_settings"]

151
config/models.yaml Normal file
View File

@@ -0,0 +1,151 @@
# AudioCraft Model Registry Configuration
# This file defines all available models and their configurations
models:
musicgen:
enabled: true
display_name: "MusicGen"
description: "Text-to-music generation with optional melody conditioning"
default_variant: medium
variants:
small:
hf_id: facebook/musicgen-small
vram_mb: 1500
max_duration: 30
description: "Fast, lightweight model (300M params)"
medium:
hf_id: facebook/musicgen-medium
vram_mb: 5000
max_duration: 30
description: "Balanced quality and speed (1.5B params)"
large:
hf_id: facebook/musicgen-large
vram_mb: 10000
max_duration: 30
description: "Highest quality, slower (3.3B params)"
melody:
hf_id: facebook/musicgen-melody
vram_mb: 5000
max_duration: 30
conditioning:
- melody
description: "Melody-conditioned generation (1.5B params)"
stereo-small:
hf_id: facebook/musicgen-stereo-small
vram_mb: 1800
max_duration: 30
channels: 2
description: "Stereo output, fast (300M params)"
stereo-medium:
hf_id: facebook/musicgen-stereo-medium
vram_mb: 6000
max_duration: 30
channels: 2
description: "Stereo output, balanced (1.5B params)"
stereo-large:
hf_id: facebook/musicgen-stereo-large
vram_mb: 12000
max_duration: 30
channels: 2
description: "Stereo output, highest quality (3.3B params)"
stereo-melody:
hf_id: facebook/musicgen-stereo-melody
vram_mb: 6000
max_duration: 30
channels: 2
conditioning:
- melody
description: "Stereo melody-conditioned (1.5B params)"
audiogen:
enabled: true
display_name: "AudioGen"
description: "Text-to-sound effects generation"
default_variant: medium
variants:
medium:
hf_id: facebook/audiogen-medium
vram_mb: 5000
max_duration: 10
description: "Sound effects generator (1.5B params)"
magnet:
enabled: true
display_name: "MAGNeT"
description: "Fast non-autoregressive music generation"
default_variant: medium-10secs
variants:
small-10secs:
hf_id: facebook/magnet-small-10secs
vram_mb: 1500
max_duration: 10
description: "Fast 10-second clips (300M params)"
medium-10secs:
hf_id: facebook/magnet-medium-10secs
vram_mb: 5000
max_duration: 10
description: "Quality 10-second clips (1.5B params)"
small-30secs:
hf_id: facebook/magnet-small-30secs
vram_mb: 1800
max_duration: 30
description: "Fast 30-second clips (300M params)"
medium-30secs:
hf_id: facebook/magnet-medium-30secs
vram_mb: 6000
max_duration: 30
description: "Quality 30-second clips (1.5B params)"
musicgen-style:
enabled: true
display_name: "MusicGen Style"
description: "Style-conditioned music generation from reference audio"
default_variant: medium
variants:
medium:
hf_id: facebook/musicgen-style
vram_mb: 5000
max_duration: 30
conditioning:
- style
description: "Style transfer from reference audio (1.5B params)"
jasco:
enabled: true
display_name: "JASCO"
description: "Chord and drum-conditioned music generation"
default_variant: chords-drums-400M
variants:
chords-drums-400M:
hf_id: facebook/jasco-chords-drums-400M
vram_mb: 2000
max_duration: 10
conditioning:
- chords
- drums
description: "Chord/drum control, fast (400M params)"
chords-drums-1B:
hf_id: facebook/jasco-chords-drums-1B
vram_mb: 4000
max_duration: 10
conditioning:
- chords
- drums
description: "Chord/drum control, higher quality (1B params)"
# Default generation parameters
defaults:
generation:
duration: 10
temperature: 1.0
top_k: 250
top_p: 0.0
cfg_coef: 3.0
# VRAM thresholds for warnings
vram:
warning_threshold: 0.85 # 85% utilization warning
critical_threshold: 0.95 # 95% utilization critical
# Presets are loaded from data/presets/*.yaml
presets_dir: "./data/presets"

94
config/settings.py Normal file
View File

@@ -0,0 +1,94 @@
"""Application settings with environment variable support."""
from functools import lru_cache
from pathlib import Path
from typing import Optional
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""Application configuration with environment variable support.
All settings can be overridden via environment variables prefixed with AUDIOCRAFT_.
Example: AUDIOCRAFT_API_PORT=8080
"""
model_config = SettingsConfigDict(
env_prefix="AUDIOCRAFT_",
env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
)
# Server Configuration
host: str = Field(default="0.0.0.0", description="Server bind host")
gradio_port: int = Field(default=7860, description="Gradio UI port")
api_port: int = Field(default=8000, description="FastAPI port")
# Paths
data_dir: Path = Field(default=Path("./data"), description="Data directory")
output_dir: Path = Field(default=Path("./outputs"), description="Generated audio output")
cache_dir: Path = Field(default=Path("./cache"), description="Model cache directory")
models_config: Path = Field(
default=Path("./config/models.yaml"), description="Model registry config"
)
# VRAM Management
comfyui_reserve_gb: float = Field(
default=10.0, description="VRAM reserved for ComfyUI (GB)"
)
safety_buffer_gb: float = Field(
default=1.0, description="Safety buffer to prevent OOM (GB)"
)
idle_unload_minutes: int = Field(
default=15, description="Unload models after idle time (minutes)"
)
max_cached_models: int = Field(
default=2, description="Maximum number of models to keep loaded"
)
# API Authentication
api_key: Optional[str] = Field(default=None, description="API key for authentication")
cors_origins: list[str] = Field(
default=["*"], description="Allowed CORS origins"
)
# Generation Defaults
default_duration: float = Field(default=10.0, description="Default generation duration")
max_duration: float = Field(default=300.0, description="Maximum generation duration")
default_batch_size: int = Field(default=1, description="Default batch size")
max_batch_size: int = Field(default=8, description="Maximum batch size")
max_queue_size: int = Field(default=100, description="Maximum generation queue size")
# Database
database_url: str = Field(
default="sqlite+aiosqlite:///./data/audiocraft.db",
description="Database connection URL",
)
# Logging
log_level: str = Field(default="INFO", description="Logging level")
def ensure_directories(self) -> None:
"""Create required directories if they don't exist."""
self.data_dir.mkdir(parents=True, exist_ok=True)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.cache_dir.mkdir(parents=True, exist_ok=True)
(self.data_dir / "presets").mkdir(parents=True, exist_ok=True)
@property
def database_path(self) -> Path:
"""Extract database file path from URL."""
if self.database_url.startswith("sqlite"):
# Handle both sqlite:/// and sqlite+aiosqlite:///
path = self.database_url.split("///")[-1]
return Path(path)
raise ValueError("Only SQLite databases are supported")
@lru_cache
def get_settings() -> Settings:
"""Get cached settings instance."""
return Settings()