diff --git a/comfyui/workflows/text-to-image/sd35-large-t2i-production-v1.json b/comfyui/workflows/text-to-image/sd35-large-t2i-production-v1.json index 79d32c3..17cbe61 100644 --- a/comfyui/workflows/text-to-image/sd35-large-t2i-production-v1.json +++ b/comfyui/workflows/text-to-image/sd35-large-t2i-production-v1.json @@ -1,12 +1,12 @@ { - "last_node_id": 12, - "last_link_id": 15, + "last_node_id": 9, + "last_link_id": 10, "nodes": [ { "id": 1, "type": "CheckpointLoaderSimple", "pos": [50, 100], - "size": {"0": 350, "1": 100}, + "size": [350, 100], "flags": {}, "order": 0, "mode": 0, @@ -20,8 +20,7 @@ { "name": "CLIP", "type": "CLIP", - "links": [2, 3], - "slot_index": 1 + "links": null }, { "name": "VAE", @@ -36,19 +35,41 @@ "widgets_values": ["sd3.5_large.safetensors"], "title": "SD3.5 Large Checkpoint Loader" }, + { + "id": 9, + "type": "TripleCLIPLoader", + "pos": [50, 250], + "size": [350, 150], + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [9, 10], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "TripleCLIPLoader" + }, + "widgets_values": ["clip_l.safetensors", "clip_g.safetensors", "t5xxl_fp16.safetensors"], + "title": "SD3.5 CLIP Loaders (L+G+T5)" + }, { "id": 2, "type": "CLIPTextEncode", "pos": [450, 100], - "size": {"0": 400, "1": 200}, + "size": [400, 200], "flags": {}, - "order": 1, + "order": 2, "mode": 0, "inputs": [ { "name": "clip", "type": "CLIP", - "link": 2 + "link": 9 } ], "outputs": [ @@ -69,15 +90,15 @@ "id": 3, "type": "CLIPTextEncode", "pos": [450, 350], - "size": {"0": 400, "1": 200}, + "size": [400, 200], "flags": {}, - "order": 2, + "order": 3, "mode": 0, "inputs": [ { "name": "clip", "type": "CLIP", - "link": 3 + "link": 10 } ], "outputs": [ @@ -98,9 +119,9 @@ "id": 4, "type": "EmptyLatentImage", "pos": [450, 600], - "size": {"0": 315, "1": 106}, + "size": [315, 106], "flags": {}, - "order": 3, + "order": 4, "mode": 0, "outputs": [ { @@ -120,9 +141,9 @@ "id": 5, "type": "KSampler", "pos": [900, 100], - "size": {"0": 315, "1": 474}, + "size": [315, 474], "flags": {}, - "order": 4, + "order": 5, "mode": 0, "inputs": [ { @@ -157,24 +178,16 @@ "properties": { "Node name for S&R": "KSampler" }, - "widgets_values": [ - 42, - "fixed", - 28, - 4.5, - "dpmpp_2m", - "karras", - 1 - ], + "widgets_values": [42, "fixed", 28, 4.5, "dpmpp_2m", "karras", 1], "title": "SD3.5 Sampler (28 steps)" }, { "id": 6, "type": "VAEDecode", "pos": [1270, 100], - "size": {"0": 210, "1": 46}, + "size": [210, 46], "flags": {}, - "order": 5, + "order": 6, "mode": 0, "inputs": [ { @@ -192,7 +205,7 @@ { "name": "IMAGE", "type": "IMAGE", - "links": [9, 10], + "links": [2, 3], "slot_index": 0 } ], @@ -205,15 +218,15 @@ "id": 7, "type": "PreviewImage", "pos": [1530, 100], - "size": {"0": 400, "1": 400}, + "size": [400, 400], "flags": {}, - "order": 6, + "order": 7, "mode": 0, "inputs": [ { "name": "images", "type": "IMAGE", - "link": 9 + "link": 2 } ], "properties": { @@ -225,15 +238,15 @@ "id": 8, "type": "SaveImage", "pos": [1530, 550], - "size": {"0": 400, "1": 100}, + "size": [400, 100], "flags": {}, - "order": 7, + "order": 8, "mode": 0, "inputs": [ { "name": "images", "type": "IMAGE", - "link": 10 + "link": 3 } ], "properties": { @@ -245,28 +258,27 @@ ], "links": [ [1, 1, 0, 5, 0, "MODEL"], - [2, 1, 1, 2, 0, "CLIP"], - [3, 1, 1, 3, 0, "CLIP"], + [2, 6, 0, 7, 0, "IMAGE"], + [3, 6, 0, 8, 0, "IMAGE"], [4, 1, 2, 6, 1, "VAE"], [5, 2, 0, 5, 1, "CONDITIONING"], [6, 3, 0, 5, 2, "CONDITIONING"], [7, 4, 0, 5, 3, "LATENT"], [8, 5, 0, 6, 0, "LATENT"], - [9, 6, 0, 7, 0, "IMAGE"], - [10, 6, 0, 8, 0, "IMAGE"] + [9, 9, 0, 2, 0, "CLIP"], + [10, 9, 0, 3, 0, "CLIP"] ], "groups": [], "config": {}, "extra": { "workflow_info": { "name": "Stable Diffusion 3.5 Large Text-to-Image Production", - "version": "1.0.0", - "author": "RunPod AI Model Orchestrator", + "version": "1.2.0", "description": "Latest generation text-to-image using Stable Diffusion 3.5 Large (28 steps). Provides excellent photorealism and prompt adherence.", "category": "text-to-image", "tags": ["sd3.5", "stable-diffusion", "large", "production", "t2i", "photorealistic"], "requirements": { - "models": ["stable-diffusion-3.5-large"], + "models": ["SD3.5-large", "CLIP-L", "CLIP-G", "T5-XXL FP16"], "custom_nodes": [], "vram_min": "18GB", "vram_recommended": "24GB" @@ -351,7 +363,13 @@ "avg_generation_time": "35-45 seconds", "vram_usage": "~18-20GB", "gpu_utilization": "95-100%" - } + }, + "notes": [ + "SD3.5 uses CheckpointLoaderSimple for MODEL+VAE", + "TripleCLIPLoader loads CLIP-L, CLIP-G, and T5-XXL separately", + "Checkpoint file doesn't include CLIP encoders", + "All three CLIP encoders required for best quality" + ] } }, "version": 0.4