{ "last_node_id": 5, "last_link_id": 4, "nodes": [ { "id": 1, "type": "CheckpointLoaderSimple", "pos": [100, 100], "size": [315, 98], "flags": {}, "order": 0, "mode": 0, "outputs": [ { "name": "MODEL", "type": "MODEL", "links": [1] }, { "name": "CLIP", "type": "CLIP", "links": [2] }, { "name": "VAE", "type": "VAE", "links": [] } ], "properties": { "Node name for S&R": "CheckpointLoaderSimple" }, "widgets_values": [ "ace_step_v1_3.5b.safetensors" ], "title": "Load ACE Step Checkpoint" }, { "id": 2, "type": "CLIPTextEncode", "pos": [500, 100], "size": [400, 200], "flags": {}, "order": 1, "mode": 0, "inputs": [ { "name": "clip", "type": "CLIP", "link": 2 } ], "outputs": [ { "name": "CONDITIONING", "type": "CONDITIONING", "links": [3] } ], "properties": { "Node name for S&R": "CLIPTextEncode" }, "widgets_values": [ "energetic rock, 140 BPM, electric guitar drums bass, powerful, D minor\n\n[verse]\nBreaking through the walls tonight\nFacing all my fears head on\n[chorus]\nWe rise together, burning bright\nNothing can hold us down for long\n[bridge]\nThrough the fire, through the rain\n[outro]" ], "title": "Text Prompt (Tags + Lyrics)" }, { "id": 3, "type": "EmptyLatentAudio", "pos": [500, 350], "size": [315, 106], "flags": {}, "order": 2, "mode": 0, "outputs": [ { "name": "LATENT", "type": "LATENT", "links": [4] } ], "properties": { "Node name for S&R": "EmptyLatentAudio" }, "widgets_values": [ 60, 512, 1 ], "title": "Empty Latent (60 seconds)" }, { "id": 4, "type": "KSampler", "pos": [900, 100], "size": [315, 262], "flags": {}, "order": 3, "mode": 0, "inputs": [ { "name": "model", "type": "MODEL", "link": 1 }, { "name": "positive", "type": "CONDITIONING", "link": 3 }, { "name": "negative", "type": "CONDITIONING", "link": null }, { "name": "latent_image", "type": "LATENT", "link": 4 } ], "outputs": [ { "name": "LATENT", "type": "LATENT", "links": [5] } ], "properties": { "Node name for S&R": "KSampler" }, "widgets_values": [ 123, "randomize", 27, 7.0, "euler", "normal", 1.0 ], "title": "Sampler (27 steps, cfg=7.0)" }, { "id": 5, "type": "VAEDecode", "pos": [1300, 100], "size": [210, 46], "flags": {}, "order": 4, "mode": 0, "inputs": [ { "name": "samples", "type": "LATENT", "link": 5 }, { "name": "vae", "type": "VAE", "link": null } ], "outputs": [ { "name": "AUDIO", "type": "AUDIO", "links": [6] } ], "properties": { "Node name for S&R": "VAEDecode" }, "title": "Decode Audio" }, { "id": 6, "type": "SaveAudio", "pos": [1550, 100], "size": [315, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [ { "name": "audio", "type": "AUDIO", "link": 6 } ], "properties": { "Node name for S&R": "SaveAudio" }, "widgets_values": [ "acestep_simple_output" ], "title": "Save Audio" } ], "links": [ [1, 1, 0, 4, 0, "MODEL"], [2, 1, 1, 2, 0, "CLIP"], [3, 2, 0, 4, 1, "CONDITIONING"], [4, 3, 0, 4, 3, "LATENT"], [5, 4, 0, 5, 0, "LATENT"], [6, 5, 0, 6, 0, "AUDIO"] ], "groups": [], "config": {}, "extra": { "workflow_info": { "name": "ACE Step Simple Text-to-Music v1", "description": "Basic text-to-music generation using ACE Step with native ComfyUI nodes (60 seconds)", "version": "1.0.0", "author": "valknar@pivoine.art", "category": "text-to-music", "tags": ["acestep", "music-generation", "text-to-music", "simple", "60s"], "requirements": { "models": ["ace_step_v1_3.5b.safetensors (Comfy-Org/ACE-Step_ComfyUI_repackaged)"], "vram_min": "8GB", "vram_recommended": "16GB", "custom_nodes": [] }, "usage": { "checkpoint": "ace_step_v1_3.5b.safetensors", "prompt_format": "Tags + Lyrics with section markers", "tags": "style, tempo (BPM), instruments, mood, key/scale", "lyrics_format": "[verse], [chorus], [bridge], [outro], [inst] sections", "duration": "60 seconds (adjustable via EmptyLatentAudio)", "steps": "27 (default, quality-speed balance)", "cfg_scale": "7.0 (classifier-free guidance)", "seed": "123 or randomize for variation" }, "performance": { "generation_time": "~2 seconds on RTX 4090 (30x real-time)", "vram_usage": "~8-12GB during generation", "quality": "High-quality coherent music with lyric alignment" }, "notes": [ "ACE Step is 15x faster than LLM baselines with superior structural coherence", "Supports 19 languages - use language tags like [en], [zh], [ja], [ko]", "For multi-language: Prefix lyrics with language code", "BPM range: 60-180, optimal 90-140", "Key signatures: Major/minor scales work best", "Use [inst] for instrumental sections without lyrics", "Increase steps to 35-50 for higher quality (slower)", "Lower CFG (4-6) for more creative, higher (8-10) for faithful to prompt" ], "examples": { "rock_example": { "tags": "energetic rock, 140 BPM, electric guitar drums bass, powerful, D minor", "lyrics": "[verse]\\nBreaking through the walls tonight\\n[chorus]\\nWe rise together, burning bright" }, "pop_example": { "tags": "upbeat pop, 120 BPM, synth piano drums, cheerful happy, C major", "lyrics": "[verse]\\nSunshine on a summer day\\n[chorus]\\nDancing all our cares away" }, "ballad_example": { "tags": "emotional ballad, 70 BPM, piano strings, melancholic sad, A minor", "lyrics": "[verse]\\nMemories of yesterday\\n[chorus]\\nWishing you were here to stay" } } } }, "version": 0.4 }