Complete web interface for Meta's AudioCraft AI audio generation: - Gradio UI with tabs for all 5 model families (MusicGen, AudioGen, MAGNeT, MusicGen Style, JASCO) - REST API with FastAPI, OpenAPI docs, and API key auth - VRAM management with ComfyUI coexistence support - SQLite database for project/generation history - Batch processing queue for async generation - Docker deployment optimized for RunPod with RTX 4090 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
152 lines
4.3 KiB
YAML
152 lines
4.3 KiB
YAML
# AudioCraft Model Registry Configuration
|
|
# This file defines all available models and their configurations
|
|
|
|
models:
|
|
musicgen:
|
|
enabled: true
|
|
display_name: "MusicGen"
|
|
description: "Text-to-music generation with optional melody conditioning"
|
|
default_variant: medium
|
|
variants:
|
|
small:
|
|
hf_id: facebook/musicgen-small
|
|
vram_mb: 1500
|
|
max_duration: 30
|
|
description: "Fast, lightweight model (300M params)"
|
|
medium:
|
|
hf_id: facebook/musicgen-medium
|
|
vram_mb: 5000
|
|
max_duration: 30
|
|
description: "Balanced quality and speed (1.5B params)"
|
|
large:
|
|
hf_id: facebook/musicgen-large
|
|
vram_mb: 10000
|
|
max_duration: 30
|
|
description: "Highest quality, slower (3.3B params)"
|
|
melody:
|
|
hf_id: facebook/musicgen-melody
|
|
vram_mb: 5000
|
|
max_duration: 30
|
|
conditioning:
|
|
- melody
|
|
description: "Melody-conditioned generation (1.5B params)"
|
|
stereo-small:
|
|
hf_id: facebook/musicgen-stereo-small
|
|
vram_mb: 1800
|
|
max_duration: 30
|
|
channels: 2
|
|
description: "Stereo output, fast (300M params)"
|
|
stereo-medium:
|
|
hf_id: facebook/musicgen-stereo-medium
|
|
vram_mb: 6000
|
|
max_duration: 30
|
|
channels: 2
|
|
description: "Stereo output, balanced (1.5B params)"
|
|
stereo-large:
|
|
hf_id: facebook/musicgen-stereo-large
|
|
vram_mb: 12000
|
|
max_duration: 30
|
|
channels: 2
|
|
description: "Stereo output, highest quality (3.3B params)"
|
|
stereo-melody:
|
|
hf_id: facebook/musicgen-stereo-melody
|
|
vram_mb: 6000
|
|
max_duration: 30
|
|
channels: 2
|
|
conditioning:
|
|
- melody
|
|
description: "Stereo melody-conditioned (1.5B params)"
|
|
|
|
audiogen:
|
|
enabled: true
|
|
display_name: "AudioGen"
|
|
description: "Text-to-sound effects generation"
|
|
default_variant: medium
|
|
variants:
|
|
medium:
|
|
hf_id: facebook/audiogen-medium
|
|
vram_mb: 5000
|
|
max_duration: 10
|
|
description: "Sound effects generator (1.5B params)"
|
|
|
|
magnet:
|
|
enabled: true
|
|
display_name: "MAGNeT"
|
|
description: "Fast non-autoregressive music generation"
|
|
default_variant: medium-10secs
|
|
variants:
|
|
small-10secs:
|
|
hf_id: facebook/magnet-small-10secs
|
|
vram_mb: 1500
|
|
max_duration: 10
|
|
description: "Fast 10-second clips (300M params)"
|
|
medium-10secs:
|
|
hf_id: facebook/magnet-medium-10secs
|
|
vram_mb: 5000
|
|
max_duration: 10
|
|
description: "Quality 10-second clips (1.5B params)"
|
|
small-30secs:
|
|
hf_id: facebook/magnet-small-30secs
|
|
vram_mb: 1800
|
|
max_duration: 30
|
|
description: "Fast 30-second clips (300M params)"
|
|
medium-30secs:
|
|
hf_id: facebook/magnet-medium-30secs
|
|
vram_mb: 6000
|
|
max_duration: 30
|
|
description: "Quality 30-second clips (1.5B params)"
|
|
|
|
musicgen-style:
|
|
enabled: true
|
|
display_name: "MusicGen Style"
|
|
description: "Style-conditioned music generation from reference audio"
|
|
default_variant: medium
|
|
variants:
|
|
medium:
|
|
hf_id: facebook/musicgen-style
|
|
vram_mb: 5000
|
|
max_duration: 30
|
|
conditioning:
|
|
- style
|
|
description: "Style transfer from reference audio (1.5B params)"
|
|
|
|
jasco:
|
|
enabled: true
|
|
display_name: "JASCO"
|
|
description: "Chord and drum-conditioned music generation"
|
|
default_variant: chords-drums-400M
|
|
variants:
|
|
chords-drums-400M:
|
|
hf_id: facebook/jasco-chords-drums-400M
|
|
vram_mb: 2000
|
|
max_duration: 10
|
|
conditioning:
|
|
- chords
|
|
- drums
|
|
description: "Chord/drum control, fast (400M params)"
|
|
chords-drums-1B:
|
|
hf_id: facebook/jasco-chords-drums-1B
|
|
vram_mb: 4000
|
|
max_duration: 10
|
|
conditioning:
|
|
- chords
|
|
- drums
|
|
description: "Chord/drum control, higher quality (1B params)"
|
|
|
|
# Default generation parameters
|
|
defaults:
|
|
generation:
|
|
duration: 10
|
|
temperature: 1.0
|
|
top_k: 250
|
|
top_p: 0.0
|
|
cfg_coef: 3.0
|
|
|
|
# VRAM thresholds for warnings
|
|
vram:
|
|
warning_threshold: 0.85 # 85% utilization warning
|
|
critical_threshold: 0.95 # 95% utilization critical
|
|
|
|
# Presets are loaded from data/presets/*.yaml
|
|
presets_dir: "./data/presets"
|