From 8c4eb8c3f1e4f73d09d1fb92de618155edf22838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Mon, 24 Nov 2025 18:14:40 +0100 Subject: [PATCH] fix: pin transformers to 4.49.0 for DiffRhythm compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves tensor dimension mismatch error in rotary position embeddings. DiffRhythm requires transformers 4.49.0 - newer versions (4.50+) cause "The size of tensor a (32) must match the size of tensor b (64)" error due to transformer block initialization changes. Updated pivoine_diffrhythm.py documentation to reflect actual root cause and link to upstream GitHub issues #44 and #48. References: - https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/44 - https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/48 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- comfyui/nodes/pivoine_diffrhythm.py | 21 ++++++++++++++++----- comfyui/requirements.txt | 2 +- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/comfyui/nodes/pivoine_diffrhythm.py b/comfyui/nodes/pivoine_diffrhythm.py index 829c223..37064f0 100644 --- a/comfyui/nodes/pivoine_diffrhythm.py +++ b/comfyui/nodes/pivoine_diffrhythm.py @@ -1,7 +1,14 @@ """ Pivoine DiffRhythm Node -Custom wrapper for DiffRhythm that disables chunked decoding to prevent -tensor dimension mismatch errors (32 vs 64) in VAE overlap logic. +Custom wrapper for DiffRhythm that ensures correct transformer library version +compatibility and provides fallback fixes for tensor dimension issues. + +Known Issue: DiffRhythm requires transformers==4.49.0. Newer versions (4.50+) +cause "The size of tensor a (32) must match the size of tensor b (64)" error +in rotary position embeddings due to transformer block initialization changes. + +Reference: https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/44 +Reference: https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/48 Author: valknar@pivoine.art """ @@ -24,12 +31,16 @@ from DiffRhythmNode import DiffRhythmRun class PivoineDiffRhythmRun(DiffRhythmRun): """ - Pivoine version of DiffRhythmRun with chunked decoding forcibly disabled. + Pivoine version of DiffRhythmRun with enhanced compatibility and error handling. Changes from original: - - Monkey-patches the infer() function to always use chunked=False - - Prevents tensor dimension mismatch in VAE (32 vs 64 channel error) + - Monkey-patches decode_audio to always use chunked=False for stability + - Ensures transformers library version compatibility (requires 4.49.0) + - Prevents tensor dimension mismatch in VAE decoding - Requires more VRAM (~12-16GB) but works reliably on RTX 4090 + + Note: If you encounter "tensor a (32) must match tensor b (64)" errors, + ensure transformers==4.49.0 is installed in your ComfyUI venv. """ CATEGORY = "🌸Pivoine/Audio" diff --git a/comfyui/requirements.txt b/comfyui/requirements.txt index 2a17393..a129075 100644 --- a/comfyui/requirements.txt +++ b/comfyui/requirements.txt @@ -1,7 +1,7 @@ torch torchvision torchaudio -transformers +transformers==4.49.0 diffusers>=0.31.0 accelerate safetensors