Initial implementation of AudioCraft Studio
Complete web interface for Meta's AudioCraft AI audio generation: - Gradio UI with tabs for all 5 model families (MusicGen, AudioGen, MAGNeT, MusicGen Style, JASCO) - REST API with FastAPI, OpenAPI docs, and API key auth - VRAM management with ComfyUI coexistence support - SQLite database for project/generation history - Batch processing queue for async generation - Docker deployment optimized for RunPod with RTX 4090 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
151
config/models.yaml
Normal file
151
config/models.yaml
Normal file
@@ -0,0 +1,151 @@
|
||||
# AudioCraft Model Registry Configuration
|
||||
# This file defines all available models and their configurations
|
||||
|
||||
models:
|
||||
musicgen:
|
||||
enabled: true
|
||||
display_name: "MusicGen"
|
||||
description: "Text-to-music generation with optional melody conditioning"
|
||||
default_variant: medium
|
||||
variants:
|
||||
small:
|
||||
hf_id: facebook/musicgen-small
|
||||
vram_mb: 1500
|
||||
max_duration: 30
|
||||
description: "Fast, lightweight model (300M params)"
|
||||
medium:
|
||||
hf_id: facebook/musicgen-medium
|
||||
vram_mb: 5000
|
||||
max_duration: 30
|
||||
description: "Balanced quality and speed (1.5B params)"
|
||||
large:
|
||||
hf_id: facebook/musicgen-large
|
||||
vram_mb: 10000
|
||||
max_duration: 30
|
||||
description: "Highest quality, slower (3.3B params)"
|
||||
melody:
|
||||
hf_id: facebook/musicgen-melody
|
||||
vram_mb: 5000
|
||||
max_duration: 30
|
||||
conditioning:
|
||||
- melody
|
||||
description: "Melody-conditioned generation (1.5B params)"
|
||||
stereo-small:
|
||||
hf_id: facebook/musicgen-stereo-small
|
||||
vram_mb: 1800
|
||||
max_duration: 30
|
||||
channels: 2
|
||||
description: "Stereo output, fast (300M params)"
|
||||
stereo-medium:
|
||||
hf_id: facebook/musicgen-stereo-medium
|
||||
vram_mb: 6000
|
||||
max_duration: 30
|
||||
channels: 2
|
||||
description: "Stereo output, balanced (1.5B params)"
|
||||
stereo-large:
|
||||
hf_id: facebook/musicgen-stereo-large
|
||||
vram_mb: 12000
|
||||
max_duration: 30
|
||||
channels: 2
|
||||
description: "Stereo output, highest quality (3.3B params)"
|
||||
stereo-melody:
|
||||
hf_id: facebook/musicgen-stereo-melody
|
||||
vram_mb: 6000
|
||||
max_duration: 30
|
||||
channels: 2
|
||||
conditioning:
|
||||
- melody
|
||||
description: "Stereo melody-conditioned (1.5B params)"
|
||||
|
||||
audiogen:
|
||||
enabled: true
|
||||
display_name: "AudioGen"
|
||||
description: "Text-to-sound effects generation"
|
||||
default_variant: medium
|
||||
variants:
|
||||
medium:
|
||||
hf_id: facebook/audiogen-medium
|
||||
vram_mb: 5000
|
||||
max_duration: 10
|
||||
description: "Sound effects generator (1.5B params)"
|
||||
|
||||
magnet:
|
||||
enabled: true
|
||||
display_name: "MAGNeT"
|
||||
description: "Fast non-autoregressive music generation"
|
||||
default_variant: medium-10secs
|
||||
variants:
|
||||
small-10secs:
|
||||
hf_id: facebook/magnet-small-10secs
|
||||
vram_mb: 1500
|
||||
max_duration: 10
|
||||
description: "Fast 10-second clips (300M params)"
|
||||
medium-10secs:
|
||||
hf_id: facebook/magnet-medium-10secs
|
||||
vram_mb: 5000
|
||||
max_duration: 10
|
||||
description: "Quality 10-second clips (1.5B params)"
|
||||
small-30secs:
|
||||
hf_id: facebook/magnet-small-30secs
|
||||
vram_mb: 1800
|
||||
max_duration: 30
|
||||
description: "Fast 30-second clips (300M params)"
|
||||
medium-30secs:
|
||||
hf_id: facebook/magnet-medium-30secs
|
||||
vram_mb: 6000
|
||||
max_duration: 30
|
||||
description: "Quality 30-second clips (1.5B params)"
|
||||
|
||||
musicgen-style:
|
||||
enabled: true
|
||||
display_name: "MusicGen Style"
|
||||
description: "Style-conditioned music generation from reference audio"
|
||||
default_variant: medium
|
||||
variants:
|
||||
medium:
|
||||
hf_id: facebook/musicgen-style
|
||||
vram_mb: 5000
|
||||
max_duration: 30
|
||||
conditioning:
|
||||
- style
|
||||
description: "Style transfer from reference audio (1.5B params)"
|
||||
|
||||
jasco:
|
||||
enabled: true
|
||||
display_name: "JASCO"
|
||||
description: "Chord and drum-conditioned music generation"
|
||||
default_variant: chords-drums-400M
|
||||
variants:
|
||||
chords-drums-400M:
|
||||
hf_id: facebook/jasco-chords-drums-400M
|
||||
vram_mb: 2000
|
||||
max_duration: 10
|
||||
conditioning:
|
||||
- chords
|
||||
- drums
|
||||
description: "Chord/drum control, fast (400M params)"
|
||||
chords-drums-1B:
|
||||
hf_id: facebook/jasco-chords-drums-1B
|
||||
vram_mb: 4000
|
||||
max_duration: 10
|
||||
conditioning:
|
||||
- chords
|
||||
- drums
|
||||
description: "Chord/drum control, higher quality (1B params)"
|
||||
|
||||
# Default generation parameters
|
||||
defaults:
|
||||
generation:
|
||||
duration: 10
|
||||
temperature: 1.0
|
||||
top_k: 250
|
||||
top_p: 0.0
|
||||
cfg_coef: 3.0
|
||||
|
||||
# VRAM thresholds for warnings
|
||||
vram:
|
||||
warning_threshold: 0.85 # 85% utilization warning
|
||||
critical_threshold: 0.95 # 95% utilization critical
|
||||
|
||||
# Presets are loaded from data/presets/*.yaml
|
||||
presets_dir: "./data/presets"
|
||||
Reference in New Issue
Block a user