How are these generated

#1
by rahul7star - opened

how are these extracted , any scripts to play around that you followed will be heplful tks

how are these extracted , any scripts to play around that you followed will be heplful tks

I had the same question you have, however I did not find a script, so I ended up creating one,
You take the single file and then convert using this:

import os
import sys
import safetensors
import safetensors.torch
from accelerate import init_empty_weights
from diffusers import QwenImageTransformer2DModel

TRANSFORMER_CONFIG = {
  "attention_head_dim": 128,
  "axes_dims_rope": [
    16,
    56,
    56
  ],
  "guidance_embeds": False,
  "in_channels": 64,
  "joint_attention_dim": 3584,
  "num_attention_heads": 24,
  "num_layers": 60,
  "out_channels": 16,
  "patch_size": 2
}


def main():
    if len(sys.argv) < 2:
        print("Usage: python convert.py <input_model_path>")
        return
    input_model_path = sys.argv[1]
    output_model_path = os.path.join(os.path.dirname(input_model_path), "diffusers")
    with init_empty_weights():
        model = QwenImageTransformer2DModel(**TRANSFORMER_CONFIG)
    state_dict = safetensors.torch.load_file(input_model_path)
    new_state_dict = {}
    # lets rename the keys that starts with diffusion_model. by just removing "diffusion_model."
    for key, value in state_dict.items():
        if key.startswith("model.diffusion_model."):
            new_key = key[len("model.diffusion_model."):]
        else:
            continue  # ignore the key
        new_state_dict[new_key] = value
    model.load_state_dict(new_state_dict, assign=True)
    model.save_pretrained(output_model_path)
    print(f"Model converted and saved to {output_model_path}")


if __name__ == "__main__":
    main()

this script converts a raw diffusion transformer checkpoint into a Hugging Face Diffusers-compatible QwenImageTransformer2DModel by renaming its weight keys and saving it in standard Diffusers format.

Sign up or log in to comment