Merge pull request #34 from EtienneDosSantos/dev

Dev
IndigoDosSantos · Mar 20, 2024 · cae9478 · cae9478
2 parents 42efb0f + be4c541
commit cae9478
Show file tree

Hide file tree

Showing 7 changed files with 40 additions and 23 deletions.
diff --git a/assets/charts/chart_dtype_VRAM_footprint_compared.png b/assets/charts/chart_dtype_VRAM_footprint_compared.png
diff --git a/assets/charts/chart_dtype_inference_and_loading_speeds_compared.png b/assets/charts/chart_dtype_inference_and_loading_speeds_compared.png
diff --git a/assets/dtype_comparison_two_images.jpg b/assets/dtype_comparison_two_images.jpg
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,3 +1,14 @@
+### 20.03.2024
+
+* Set guidance_scale (decoder) to 1.9 and num_inference_steps to 54 for optimal image quality.
+* **Key finding:** Using torch.bfloat16 for the decoder significantly increased model loading speed (3.24x faster) compared to torch.float16. "Other performance metrics remained virtually unchanged, and surprisingly, there was no perceptible difference in image quality (see [Figure 1](https://github.com/EtienneDosSantos/stable-cascade-one-click-installer/blob/dev/assets/dtype_comparison_two_images.jpg)).
+  * **Charts:** I've created two charts visualizing these results (see [Figure 2](https://github.com/EtienneDosSantos/stable-cascade-one-click-installer/blob/dev/assets/charts/chart_dtype_inference_and_loading_speeds_compared.png), [Figure 3](https://github.com/EtienneDosSantos/stable-cascade-one-click-installer/blob/dev/assets/charts/chart_dtype_VRAM_footprint_compared.png)).
+
+### 19.03.2024
+
+* **[PR #7381:](https://github.com/huggingface/diffusers/pull/7381)**
+  * Fixed the bug so we can generate multiple images simultaneously – thx [@DN6](https://github.com/DN6)! 🎉
+
 ### 17.03.2024
 
 * **[PR #31:](https://github.com/EtienneDosSantos/stable-cascade-one-click-installer/commit/e84010c83daa126b10cecae584cb8a4979689528)**

diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md
@@ -1,25 +1,34 @@
 ### Features to Add:
 
-**1. Image Metadata Storage**
+**3. Test Decoder Dtype Influence** ✔️
+
+* **`torch.bfloat16` vs. `torch.float16`:**
+    - [x] VRAM footprint
+    - [x] Inference speed
+    - [x] Image quality
+
+**2. Batch Size Fix (>1)** ✔️
+
+* **Goal:** Restore the ability to generate multiple images per prompt.
+    - [x] Not getting anywhere, opened [issue #7377](https://github.com/huggingface/diffusers/issues/7377) to hopefully get this resolved.
+        - [x] **Issue Review:** Test provided solution to issue ([PR #7381](https://github.com/huggingface/diffusers/pull/7381))! Amazing work, thx [@DN6](https://github.com/DN6)! 🎉
+* **Troubleshooting Steps:**
+    - [ ] **Error Analysis:** Identify the specific error or unexpected behavior.
+    - [ ] **Code Review:** Examine logic related to batch size handling.
+    - [ ] **Dependency Check:** Ensure compatibility between any updated libraries and the batching functionality.
+
+**1. Image Metadata Storage** ✔️
 
 * **Goal:** Embed essential generation parameters within generated images for reproducibility and analysis.
 * **Metadata to Include:**
     - [x] Seed
     - [x] Number of steps
     - [x] Model name
     - [x] CFG value
-    - [ ] Sampler
+    - [x] Sampler
     - [x] Prompt
 
 * **Implementation Steps:**
     - **Library Selection:** Research image metadata libraries (e.g., ExifWrite, PIL/Pillow).
     - **Integration:** Modify image generation code to write metadata.
     - **Testing:** Verify metadata is written and readable.
-
-**2. Batch Size Fix (>1)**
-
-* **Goal:** Restore the ability to generate multiple images per prompt.
-* **Troubleshooting Steps:**
-    - [ ] **Error Analysis:** Identify the specific error or unexpected behavior.
-    - [ ] **Code Review:** Examine logic related to batch size handling.
-    - [ ] **Dependency Check:** Ensure compatibility between any updated libraries and the batching functionality.
diff --git a/requirements.txt b/requirements.txt
@@ -11,7 +11,7 @@
 
 --find-links https://download.pytorch.org/whl/torch_stable.html
 accelerate>=0.25.0
-diffusers==0.27.0
+diffusers==0.27.2
 einops>=0.7.0
 gradio
 kornia>=0.7.0

diff --git a/run.py b/run.py
@@ -6,7 +6,7 @@
 # Stability AI Non-Commercial Research Community License Agreement, dated November 28, 2023.
 # For more information, see https://stability.ai/use-policy.
 
-from diffusers import StableCascadeDecoderPipeline, StableCascadePriorPipeline
+from diffusers import StableCascadeDecoderPipeline, StableCascadePriorPipeline, StableCascadeUNet
 import gradio as gr
 import json
 import os
@@ -24,9 +24,9 @@
 def load_model(model_name):
     # Load model from disk every time it's needed
     if model_name == "prior":
-        model = StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", variant="bf16", torch_dtype=dtype).to(device)
+        model = StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", variant="bf16", torch_dtype=dtype, use_safetensors=True).to(device)
     elif model_name == "decoder":
-        model = StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", variant="bf16", torch_dtype=torch.float16).to(device)
+        model = StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", variant="bf16", torch_dtype=dtype, use_safetensors=True).to(device)
     else:
         raise ValueError(f"Unknown model name: {model_name}")
     return model
@@ -79,26 +79,23 @@ def generate_images(prompt, height, width, negative_prompt, guidance_scale, num_
         num_images_per_prompt=int(num_images_per_prompt),
         generator=generator,
     )
-    del prior  # Explicitly delete the model to help with memory management
-    torch.cuda.empty_cache()  # Clear the CUDA cache to free up unused memory
 
     # Load, use, and discard the decoder model
     decoder = load_model("decoder")
     decoder.enable_model_cpu_offload()
     decoder_output = decoder(
-        image_embeddings=prior_output.image_embeddings.to(torch.float16),
+        image_embeddings=prior_output.image_embeddings.to(dtype),
         prompt=cleaned_prompt,
         negative_prompt=negative_prompt,
-        guidance_scale=0.0,
+        guidance_scale=1.9, # Guidance scale is enabled by setting guidance_scale > 1
         num_inference_steps=calculated_steps_decoder,
         output_type="pil",
         generator=generator,
     ).images
-    del decoder  # Explicitly delete the model to help with memory management
-    torch.cuda.empty_cache()  # Clear the CUDA cache to free up unused memory
-
+
     metadata_embedded = {
      "parameters": "Stable Cascade",
+     "scheduler": "DDPMWuerstchenScheduler",
      "prompt": cleaned_prompt,
      "negative_prompt": negative_prompt,
      "width": int(width),
@@ -190,8 +187,8 @@ def configure_ui():
                 height = gr.Slider(minimum=512, maximum=2048, step=1, value=1024, label="Image Height")
             with gr.Column():
                 # components in central column
-                num_inference_steps = gr.Slider(minimum=1, maximum=150, step=1, value=30, label="Steps")
-                num_images_per_prompt = gr.Number(label="Number of Images per Prompt (Currently, the system can only generate one image at a time. Please leave the 'Images per Prompt' setting at 1 until this issue is fixed.)", value=1)
+                num_inference_steps = gr.Slider(minimum=1, maximum=150, step=1, value=54, label="Steps")
+                num_images_per_prompt = gr.Number(label="Number of Images per Prompt", value=2)
             with gr.Column():
                 # components in right column
                 guidance_scale = gr.Slider(minimum=1, maximum=20, step=0.5, value=4.0, label="Guidance Scale")