All checks were successful
Build and Push RunPod Docker Image / build-and-push (push) Successful in 14s
- Remove custom PivoineDiffRhythmRun wrapper node - Add git patch file for ComfyUI_DiffRhythm __init__.py - Patch adds LlamaConfig fix at import time - Add arty script 'fix/diffrhythm-patch' to apply patch - Revert all workflows to use original DiffRhythmRun - Remove startup_patch.py and revert start.sh This approach is cleaner and more maintainable than wrapping the node. The patch directly fixes the tensor dimension mismatch (32 vs 64) in DiffRhythm's rotary position embeddings by ensuring num_attention_heads and num_key_value_heads are properly set based on hidden_size. References: - https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/44 - https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/48
131 lines
3.9 KiB
JSON
131 lines
3.9 KiB
JSON
{
|
|
"last_node_id": 3,
|
|
"last_link_id": 2,
|
|
"nodes": [
|
|
{
|
|
"id": 1,
|
|
"type": "DiffRhythmRun",
|
|
"pos": [100, 100],
|
|
"size": [400, 400],
|
|
"flags": {},
|
|
"order": 0,
|
|
"mode": 0,
|
|
"outputs": [
|
|
{
|
|
"name": "AUDIO",
|
|
"type": "AUDIO",
|
|
"links": [1, 2]
|
|
}
|
|
],
|
|
"properties": {
|
|
"Node name for S&R": "DiffRhythmRun"
|
|
},
|
|
"widgets_values": [
|
|
"cfm_full_model.pt",
|
|
"Cinematic orchestral piece with soaring strings, powerful brass, and emotional piano melodies building to an epic crescendo",
|
|
true,
|
|
"euler",
|
|
30,
|
|
4,
|
|
"quality",
|
|
123,
|
|
"randomize",
|
|
false,
|
|
"[-1, 20], [60, -1]"
|
|
],
|
|
"title": "DiffRhythm Full-Length Text-to-Music (4m45s)"
|
|
},
|
|
{
|
|
"id": 2,
|
|
"type": "PreviewAudio",
|
|
"pos": [600, 100],
|
|
"size": [300, 100],
|
|
"flags": {},
|
|
"order": 1,
|
|
"mode": 0,
|
|
"inputs": [
|
|
{
|
|
"name": "audio",
|
|
"type": "AUDIO",
|
|
"link": 1
|
|
}
|
|
],
|
|
"properties": {
|
|
"Node name for S&R": "PreviewAudio"
|
|
},
|
|
"title": "Preview Audio"
|
|
},
|
|
{
|
|
"id": 3,
|
|
"type": "SaveAudio",
|
|
"pos": [600, 250],
|
|
"size": [300, 100],
|
|
"flags": {},
|
|
"order": 2,
|
|
"mode": 0,
|
|
"inputs": [
|
|
{
|
|
"name": "audio",
|
|
"type": "AUDIO",
|
|
"link": 2
|
|
}
|
|
],
|
|
"properties": {
|
|
"Node name for S&R": "SaveAudio"
|
|
},
|
|
"widgets_values": [
|
|
"diffrhythm_full_output"
|
|
],
|
|
"title": "Save Audio"
|
|
}
|
|
],
|
|
"links": [
|
|
[1, 1, 0, 2, 0, "AUDIO"],
|
|
[2, 1, 0, 3, 0, "AUDIO"]
|
|
],
|
|
"groups": [],
|
|
"config": {},
|
|
"extra": {
|
|
"workflow_info": {
|
|
"name": "DiffRhythm Full-Length Text-to-Music v1",
|
|
"description": "Full-length music generation using DiffRhythm Full (4 minutes 45 seconds)",
|
|
"version": "1.0.0",
|
|
"author": "valknar@pivoine.art",
|
|
"category": "text-to-music",
|
|
"tags": ["diffrhythm", "music-generation", "text-to-music", "full-length", "4m45s"],
|
|
"requirements": {
|
|
"custom_nodes": ["ComfyUI_DiffRhythm"],
|
|
"models": ["ASLP-lab/DiffRhythm-full", "ASLP-lab/DiffRhythm-vae", "OpenMuQ/MuQ-MuLan-large", "OpenMuQ/MuQ-large-msd-iter", "FacebookAI/xlm-roberta-base"],
|
|
"vram_min": "16GB",
|
|
"vram_recommended": "20GB",
|
|
"system_deps": ["espeak-ng"]
|
|
},
|
|
"usage": {
|
|
"model": "cfm_full_model.pt (DiffRhythm Full - 4m45s/285s generation)",
|
|
"style_prompt": "Detailed text description of the desired full-length music composition",
|
|
"unload_model": "Boolean to unload model after generation (default: true)",
|
|
"odeint_method": "ODE solver: euler, midpoint, rk4, implicit_adams (default: euler)",
|
|
"steps": "Number of diffusion steps: 1-100 (default: 30)",
|
|
"cfg": "Classifier-free guidance scale: 1-10 (default: 4)",
|
|
"quality_or_speed": "Generation mode: quality or speed (default: quality for full-length)",
|
|
"seed": "Random seed for reproducibility (default: 123)",
|
|
"edit": "Enable segment editing mode (default: false)",
|
|
"edit_segments": "Segments to edit when edit=true"
|
|
},
|
|
"performance": {
|
|
"generation_time": "~60-90 seconds on RTX 4090",
|
|
"vram_usage": "~16GB during generation",
|
|
"note": "Significantly faster than real-time music generation"
|
|
},
|
|
"notes": [
|
|
"This workflow uses DiffRhythm Full for 4 minute 45 second music generation",
|
|
"Best for complete song compositions with intro, development, and outro",
|
|
"All parameters except model and style_prompt are optional",
|
|
"Supports complex, multi-part compositions",
|
|
"Can optionally connect MultiLineLyricsDR node for lyrics input"
|
|
]
|
|
}
|
|
},
|
|
"version": 0.4
|
|
}
|