Initial implementation of AudioCraft Studio
Complete web interface for Meta's AudioCraft AI audio generation: - Gradio UI with tabs for all 5 model families (MusicGen, AudioGen, MAGNeT, MusicGen Style, JASCO) - REST API with FastAPI, OpenAPI docs, and API key auth - VRAM management with ComfyUI coexistence support - SQLite database for project/generation history - Batch processing queue for async generation - Docker deployment optimized for RunPod with RTX 4090 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
5
config/__init__.py
Normal file
5
config/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Configuration module for AudioCraft Studio."""
|
||||
|
||||
from config.settings import Settings, get_settings
|
||||
|
||||
__all__ = ["Settings", "get_settings"]
|
||||
151
config/models.yaml
Normal file
151
config/models.yaml
Normal file
@@ -0,0 +1,151 @@
|
||||
# AudioCraft Model Registry Configuration
|
||||
# This file defines all available models and their configurations
|
||||
|
||||
models:
|
||||
musicgen:
|
||||
enabled: true
|
||||
display_name: "MusicGen"
|
||||
description: "Text-to-music generation with optional melody conditioning"
|
||||
default_variant: medium
|
||||
variants:
|
||||
small:
|
||||
hf_id: facebook/musicgen-small
|
||||
vram_mb: 1500
|
||||
max_duration: 30
|
||||
description: "Fast, lightweight model (300M params)"
|
||||
medium:
|
||||
hf_id: facebook/musicgen-medium
|
||||
vram_mb: 5000
|
||||
max_duration: 30
|
||||
description: "Balanced quality and speed (1.5B params)"
|
||||
large:
|
||||
hf_id: facebook/musicgen-large
|
||||
vram_mb: 10000
|
||||
max_duration: 30
|
||||
description: "Highest quality, slower (3.3B params)"
|
||||
melody:
|
||||
hf_id: facebook/musicgen-melody
|
||||
vram_mb: 5000
|
||||
max_duration: 30
|
||||
conditioning:
|
||||
- melody
|
||||
description: "Melody-conditioned generation (1.5B params)"
|
||||
stereo-small:
|
||||
hf_id: facebook/musicgen-stereo-small
|
||||
vram_mb: 1800
|
||||
max_duration: 30
|
||||
channels: 2
|
||||
description: "Stereo output, fast (300M params)"
|
||||
stereo-medium:
|
||||
hf_id: facebook/musicgen-stereo-medium
|
||||
vram_mb: 6000
|
||||
max_duration: 30
|
||||
channels: 2
|
||||
description: "Stereo output, balanced (1.5B params)"
|
||||
stereo-large:
|
||||
hf_id: facebook/musicgen-stereo-large
|
||||
vram_mb: 12000
|
||||
max_duration: 30
|
||||
channels: 2
|
||||
description: "Stereo output, highest quality (3.3B params)"
|
||||
stereo-melody:
|
||||
hf_id: facebook/musicgen-stereo-melody
|
||||
vram_mb: 6000
|
||||
max_duration: 30
|
||||
channels: 2
|
||||
conditioning:
|
||||
- melody
|
||||
description: "Stereo melody-conditioned (1.5B params)"
|
||||
|
||||
audiogen:
|
||||
enabled: true
|
||||
display_name: "AudioGen"
|
||||
description: "Text-to-sound effects generation"
|
||||
default_variant: medium
|
||||
variants:
|
||||
medium:
|
||||
hf_id: facebook/audiogen-medium
|
||||
vram_mb: 5000
|
||||
max_duration: 10
|
||||
description: "Sound effects generator (1.5B params)"
|
||||
|
||||
magnet:
|
||||
enabled: true
|
||||
display_name: "MAGNeT"
|
||||
description: "Fast non-autoregressive music generation"
|
||||
default_variant: medium-10secs
|
||||
variants:
|
||||
small-10secs:
|
||||
hf_id: facebook/magnet-small-10secs
|
||||
vram_mb: 1500
|
||||
max_duration: 10
|
||||
description: "Fast 10-second clips (300M params)"
|
||||
medium-10secs:
|
||||
hf_id: facebook/magnet-medium-10secs
|
||||
vram_mb: 5000
|
||||
max_duration: 10
|
||||
description: "Quality 10-second clips (1.5B params)"
|
||||
small-30secs:
|
||||
hf_id: facebook/magnet-small-30secs
|
||||
vram_mb: 1800
|
||||
max_duration: 30
|
||||
description: "Fast 30-second clips (300M params)"
|
||||
medium-30secs:
|
||||
hf_id: facebook/magnet-medium-30secs
|
||||
vram_mb: 6000
|
||||
max_duration: 30
|
||||
description: "Quality 30-second clips (1.5B params)"
|
||||
|
||||
musicgen-style:
|
||||
enabled: true
|
||||
display_name: "MusicGen Style"
|
||||
description: "Style-conditioned music generation from reference audio"
|
||||
default_variant: medium
|
||||
variants:
|
||||
medium:
|
||||
hf_id: facebook/musicgen-style
|
||||
vram_mb: 5000
|
||||
max_duration: 30
|
||||
conditioning:
|
||||
- style
|
||||
description: "Style transfer from reference audio (1.5B params)"
|
||||
|
||||
jasco:
|
||||
enabled: true
|
||||
display_name: "JASCO"
|
||||
description: "Chord and drum-conditioned music generation"
|
||||
default_variant: chords-drums-400M
|
||||
variants:
|
||||
chords-drums-400M:
|
||||
hf_id: facebook/jasco-chords-drums-400M
|
||||
vram_mb: 2000
|
||||
max_duration: 10
|
||||
conditioning:
|
||||
- chords
|
||||
- drums
|
||||
description: "Chord/drum control, fast (400M params)"
|
||||
chords-drums-1B:
|
||||
hf_id: facebook/jasco-chords-drums-1B
|
||||
vram_mb: 4000
|
||||
max_duration: 10
|
||||
conditioning:
|
||||
- chords
|
||||
- drums
|
||||
description: "Chord/drum control, higher quality (1B params)"
|
||||
|
||||
# Default generation parameters
|
||||
defaults:
|
||||
generation:
|
||||
duration: 10
|
||||
temperature: 1.0
|
||||
top_k: 250
|
||||
top_p: 0.0
|
||||
cfg_coef: 3.0
|
||||
|
||||
# VRAM thresholds for warnings
|
||||
vram:
|
||||
warning_threshold: 0.85 # 85% utilization warning
|
||||
critical_threshold: 0.95 # 95% utilization critical
|
||||
|
||||
# Presets are loaded from data/presets/*.yaml
|
||||
presets_dir: "./data/presets"
|
||||
94
config/settings.py
Normal file
94
config/settings.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""Application settings with environment variable support."""
|
||||
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Application configuration with environment variable support.
|
||||
|
||||
All settings can be overridden via environment variables prefixed with AUDIOCRAFT_.
|
||||
Example: AUDIOCRAFT_API_PORT=8080
|
||||
"""
|
||||
|
||||
model_config = SettingsConfigDict(
|
||||
env_prefix="AUDIOCRAFT_",
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
# Server Configuration
|
||||
host: str = Field(default="0.0.0.0", description="Server bind host")
|
||||
gradio_port: int = Field(default=7860, description="Gradio UI port")
|
||||
api_port: int = Field(default=8000, description="FastAPI port")
|
||||
|
||||
# Paths
|
||||
data_dir: Path = Field(default=Path("./data"), description="Data directory")
|
||||
output_dir: Path = Field(default=Path("./outputs"), description="Generated audio output")
|
||||
cache_dir: Path = Field(default=Path("./cache"), description="Model cache directory")
|
||||
models_config: Path = Field(
|
||||
default=Path("./config/models.yaml"), description="Model registry config"
|
||||
)
|
||||
|
||||
# VRAM Management
|
||||
comfyui_reserve_gb: float = Field(
|
||||
default=10.0, description="VRAM reserved for ComfyUI (GB)"
|
||||
)
|
||||
safety_buffer_gb: float = Field(
|
||||
default=1.0, description="Safety buffer to prevent OOM (GB)"
|
||||
)
|
||||
idle_unload_minutes: int = Field(
|
||||
default=15, description="Unload models after idle time (minutes)"
|
||||
)
|
||||
max_cached_models: int = Field(
|
||||
default=2, description="Maximum number of models to keep loaded"
|
||||
)
|
||||
|
||||
# API Authentication
|
||||
api_key: Optional[str] = Field(default=None, description="API key for authentication")
|
||||
cors_origins: list[str] = Field(
|
||||
default=["*"], description="Allowed CORS origins"
|
||||
)
|
||||
|
||||
# Generation Defaults
|
||||
default_duration: float = Field(default=10.0, description="Default generation duration")
|
||||
max_duration: float = Field(default=300.0, description="Maximum generation duration")
|
||||
default_batch_size: int = Field(default=1, description="Default batch size")
|
||||
max_batch_size: int = Field(default=8, description="Maximum batch size")
|
||||
max_queue_size: int = Field(default=100, description="Maximum generation queue size")
|
||||
|
||||
# Database
|
||||
database_url: str = Field(
|
||||
default="sqlite+aiosqlite:///./data/audiocraft.db",
|
||||
description="Database connection URL",
|
||||
)
|
||||
|
||||
# Logging
|
||||
log_level: str = Field(default="INFO", description="Logging level")
|
||||
|
||||
def ensure_directories(self) -> None:
|
||||
"""Create required directories if they don't exist."""
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
(self.data_dir / "presets").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@property
|
||||
def database_path(self) -> Path:
|
||||
"""Extract database file path from URL."""
|
||||
if self.database_url.startswith("sqlite"):
|
||||
# Handle both sqlite:/// and sqlite+aiosqlite:///
|
||||
path = self.database_url.split("///")[-1]
|
||||
return Path(path)
|
||||
raise ValueError("Only SQLite databases are supported")
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> Settings:
|
||||
"""Get cached settings instance."""
|
||||
return Settings()
|
||||
Reference in New Issue
Block a user