How are these generated
#1
by
rahul7star
- opened
how are these extracted , any scripts to play around that you followed will be heplful tks
how are these extracted , any scripts to play around that you followed will be heplful tks
I had the same question you have, however I did not find a script, so I ended up creating one,
You take the single file and then convert using this:
import os
import sys
import safetensors
import safetensors.torch
from accelerate import init_empty_weights
from diffusers import QwenImageTransformer2DModel
TRANSFORMER_CONFIG = {
"attention_head_dim": 128,
"axes_dims_rope": [
16,
56,
56
],
"guidance_embeds": False,
"in_channels": 64,
"joint_attention_dim": 3584,
"num_attention_heads": 24,
"num_layers": 60,
"out_channels": 16,
"patch_size": 2
}
def main():
if len(sys.argv) < 2:
print("Usage: python convert.py <input_model_path>")
return
input_model_path = sys.argv[1]
output_model_path = os.path.join(os.path.dirname(input_model_path), "diffusers")
with init_empty_weights():
model = QwenImageTransformer2DModel(**TRANSFORMER_CONFIG)
state_dict = safetensors.torch.load_file(input_model_path)
new_state_dict = {}
# lets rename the keys that starts with diffusion_model. by just removing "diffusion_model."
for key, value in state_dict.items():
if key.startswith("model.diffusion_model."):
new_key = key[len("model.diffusion_model."):]
else:
continue # ignore the key
new_state_dict[new_key] = value
model.load_state_dict(new_state_dict, assign=True)
model.save_pretrained(output_model_path)
print(f"Model converted and saved to {output_model_path}")
if __name__ == "__main__":
main()
this script converts a raw diffusion transformer checkpoint into a Hugging Face Diffusers-compatible QwenImageTransformer2DModel by renaming its weight keys and saving it in standard Diffusers format.