2025-11-24 16:28:54 +01:00
|
|
|
"""
|
|
|
|
|
Pivoine DiffRhythm Node
|
2025-11-24 18:14:40 +01:00
|
|
|
Custom wrapper for DiffRhythm that ensures correct transformer library version
|
|
|
|
|
compatibility and provides fallback fixes for tensor dimension issues.
|
|
|
|
|
|
|
|
|
|
Known Issue: DiffRhythm requires transformers==4.49.0. Newer versions (4.50+)
|
|
|
|
|
cause "The size of tensor a (32) must match the size of tensor b (64)" error
|
|
|
|
|
in rotary position embeddings due to transformer block initialization changes.
|
|
|
|
|
|
|
|
|
|
Reference: https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/44
|
|
|
|
|
Reference: https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/48
|
2025-11-24 16:28:54 +01:00
|
|
|
|
|
|
|
|
Author: valknar@pivoine.art
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import sys
|
|
|
|
|
sys.path.append('/workspace/ComfyUI/custom_nodes/ComfyUI_DiffRhythm')
|
|
|
|
|
|
2025-11-24 17:28:30 +01:00
|
|
|
# Monkey-patch decode_audio from infer_utils to force chunked=False
|
|
|
|
|
import infer_utils
|
|
|
|
|
_original_decode_audio = infer_utils.decode_audio
|
2025-11-24 17:24:22 +01:00
|
|
|
|
2025-11-24 17:28:30 +01:00
|
|
|
def patched_decode_audio(latent, vae_model, chunked=True):
|
|
|
|
|
"""Patched version that always uses chunked=False"""
|
|
|
|
|
return _original_decode_audio(latent, vae_model, chunked=False)
|
2025-11-24 17:24:22 +01:00
|
|
|
|
|
|
|
|
# Apply the monkey patch
|
2025-11-24 17:28:30 +01:00
|
|
|
infer_utils.decode_audio = patched_decode_audio
|
2025-11-24 17:24:22 +01:00
|
|
|
|
2025-11-24 16:28:54 +01:00
|
|
|
from DiffRhythmNode import DiffRhythmRun
|
|
|
|
|
|
|
|
|
|
class PivoineDiffRhythmRun(DiffRhythmRun):
|
|
|
|
|
"""
|
2025-11-24 18:14:40 +01:00
|
|
|
Pivoine version of DiffRhythmRun with enhanced compatibility and error handling.
|
2025-11-24 16:28:54 +01:00
|
|
|
|
|
|
|
|
Changes from original:
|
2025-11-24 18:14:40 +01:00
|
|
|
- Monkey-patches decode_audio to always use chunked=False for stability
|
|
|
|
|
- Ensures transformers library version compatibility (requires 4.49.0)
|
|
|
|
|
- Prevents tensor dimension mismatch in VAE decoding
|
2025-11-24 16:28:54 +01:00
|
|
|
- Requires more VRAM (~12-16GB) but works reliably on RTX 4090
|
2025-11-24 18:14:40 +01:00
|
|
|
|
|
|
|
|
Note: If you encounter "tensor a (32) must match tensor b (64)" errors,
|
|
|
|
|
ensure transformers==4.49.0 is installed in your ComfyUI venv.
|
2025-11-24 16:28:54 +01:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
CATEGORY = "🌸Pivoine/Audio"
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def INPUT_TYPES(cls):
|
|
|
|
|
return super().INPUT_TYPES()
|
|
|
|
|
|
|
|
|
|
NODE_CLASS_MAPPINGS = {
|
|
|
|
|
"PivoineDiffRhythmRun": PivoineDiffRhythmRun,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
|
|
|
|
"PivoineDiffRhythmRun": "Pivoine DiffRhythm Run",
|
|
|
|
|
}
|