runpod/comfyui/nodes/pivoine_diffrhythm.py

"""
Pivoine DiffRhythm Node
Custom wrapper for DiffRhythm that ensures correct transformer library version
compatibility and provides fallback fixes for tensor dimension issues.

Known Issue: DiffRhythm requires transformers==4.49.0. Newer versions (4.50+)
cause "The size of tensor a (32) must match the size of tensor b (64)" error
in rotary position embeddings due to transformer block initialization changes.

Reference: https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/44
Reference: https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/48

Author: valknar@pivoine.art
"""

import sys
sys.path.append('/workspace/ComfyUI/custom_nodes/ComfyUI_DiffRhythm')

# Monkey-patch decode_audio from infer_utils to force chunked=False
import infer_utils
_original_decode_audio = infer_utils.decode_audio

def patched_decode_audio(latent, vae_model, chunked=True):
    """Patched version that always uses chunked=False"""
    return _original_decode_audio(latent, vae_model, chunked=False)

# Apply the monkey patch
infer_utils.decode_audio = patched_decode_audio

from DiffRhythmNode import DiffRhythmRun

class PivoineDiffRhythmRun(DiffRhythmRun):
    """
    Pivoine version of DiffRhythmRun with enhanced compatibility and error handling.

    Changes from original:
    - Monkey-patches decode_audio to always use chunked=False for stability
    - Ensures transformers library version compatibility (requires 4.49.0)
    - Prevents tensor dimension mismatch in VAE decoding
    - Requires more VRAM (~12-16GB) but works reliably on RTX 4090

    Note: If you encounter "tensor a (32) must match tensor b (64)" errors,
    ensure transformers==4.49.0 is installed in your ComfyUI venv.
    """

    CATEGORY = "🌸Pivoine/Audio"

    @classmethod
    def INPUT_TYPES(cls):
        return super().INPUT_TYPES()

NODE_CLASS_MAPPINGS = {
    "PivoineDiffRhythmRun": PivoineDiffRhythmRun,
}

NODE_DISPLAY_NAME_MAPPINGS = {
    "PivoineDiffRhythmRun": "Pivoine DiffRhythm Run",
}