# AudioCraft Model Registry Configuration # This file defines all available models and their configurations models: musicgen: enabled: true display_name: "MusicGen" description: "Text-to-music generation with optional melody conditioning" default_variant: medium variants: small: hf_id: facebook/musicgen-small vram_mb: 1500 max_duration: 30 description: "Fast, lightweight model (300M params)" medium: hf_id: facebook/musicgen-medium vram_mb: 5000 max_duration: 30 description: "Balanced quality and speed (1.5B params)" large: hf_id: facebook/musicgen-large vram_mb: 10000 max_duration: 30 description: "Highest quality, slower (3.3B params)" melody: hf_id: facebook/musicgen-melody vram_mb: 5000 max_duration: 30 conditioning: - melody description: "Melody-conditioned generation (1.5B params)" stereo-small: hf_id: facebook/musicgen-stereo-small vram_mb: 1800 max_duration: 30 channels: 2 description: "Stereo output, fast (300M params)" stereo-medium: hf_id: facebook/musicgen-stereo-medium vram_mb: 6000 max_duration: 30 channels: 2 description: "Stereo output, balanced (1.5B params)" stereo-large: hf_id: facebook/musicgen-stereo-large vram_mb: 12000 max_duration: 30 channels: 2 description: "Stereo output, highest quality (3.3B params)" stereo-melody: hf_id: facebook/musicgen-stereo-melody vram_mb: 6000 max_duration: 30 channels: 2 conditioning: - melody description: "Stereo melody-conditioned (1.5B params)" audiogen: enabled: true display_name: "AudioGen" description: "Text-to-sound effects generation" default_variant: medium variants: medium: hf_id: facebook/audiogen-medium vram_mb: 5000 max_duration: 10 description: "Sound effects generator (1.5B params)" magnet: enabled: true display_name: "MAGNeT" description: "Fast non-autoregressive music generation" default_variant: medium-10secs variants: small-10secs: hf_id: facebook/magnet-small-10secs vram_mb: 1500 max_duration: 10 description: "Fast 10-second clips (300M params)" medium-10secs: hf_id: facebook/magnet-medium-10secs vram_mb: 5000 max_duration: 10 description: "Quality 10-second clips (1.5B params)" small-30secs: hf_id: facebook/magnet-small-30secs vram_mb: 1800 max_duration: 30 description: "Fast 30-second clips (300M params)" medium-30secs: hf_id: facebook/magnet-medium-30secs vram_mb: 6000 max_duration: 30 description: "Quality 30-second clips (1.5B params)" musicgen-style: enabled: true display_name: "MusicGen Style" description: "Style-conditioned music generation from reference audio" default_variant: medium variants: medium: hf_id: facebook/musicgen-style vram_mb: 5000 max_duration: 30 conditioning: - style description: "Style transfer from reference audio (1.5B params)" jasco: enabled: true display_name: "JASCO" description: "Chord and drum-conditioned music generation" default_variant: chords-drums-400M variants: chords-drums-400M: hf_id: facebook/jasco-chords-drums-400M vram_mb: 2000 max_duration: 10 conditioning: - chords - drums description: "Chord/drum control, fast (400M params)" chords-drums-1B: hf_id: facebook/jasco-chords-drums-1B vram_mb: 4000 max_duration: 10 conditioning: - chords - drums description: "Chord/drum control, higher quality (1B params)" # Default generation parameters defaults: generation: duration: 10 temperature: 1.0 top_k: 250 top_p: 0.0 cfg_coef: 3.0 # VRAM thresholds for warnings vram: warning_threshold: 0.85 # 85% utilization warning critical_threshold: 0.95 # 95% utilization critical # Presets are loaded from data/presets/*.yaml presets_dir: "./data/presets"