Skip to content
This repository has been archived by the owner on May 14, 2024. It is now read-only.

Aspect ratio bucketing in SDXL lora training #343

Open
ArtLeav opened this issue Mar 8, 2024 · 3 comments
Open

Aspect ratio bucketing in SDXL lora training #343

ArtLeav opened this issue Mar 8, 2024 · 3 comments

Comments

@ArtLeav
Copy link

ArtLeav commented Mar 8, 2024

Hello! I have a question/problem: how can I make sure that resizing (presumably referring to image resizing) does not reduce the longer side of the image to 1024 pixels, but instead maintains specific ratios such as 768x1344? I tried make bucket_resolution higher (up to 2048) but it doesn't help
изображение

@ArtLeav
Copy link
Author

ArtLeav commented Mar 8, 2024

My attempt to solve it look like


# @title ## **3.4. Bucketing and Latents Caching**
%store -r

# @markdown This code will create buckets based on the `bucket_resolution` provided for multi-aspect ratio training, and then convert all images within the `train_data_dir` to latents.
bucketing_json    = os.path.join(training_dir, "meta_lat.json")
metadata_json     = os.path.join(training_dir, "meta_clean.json")
bucket_resolution = 1344  # @param {type:"slider", min:512, max:2048, step:64}
bucket_reso_steps = 32  # @param {type:"slider", min:64, max:2048, step:32}
min_bucket_reso = 512  # @param {type:"slider", min:512, max:1600, step:64}
max_bucket_reso = 1536  # @param {type:"slider", min:512, max:1600, step:64}



mixed_precision   = "no"  # @param ["no", "fp16", "bf16"] {allow-input: false}
skip_existing     = False  # @param{type:"boolean"}
flip_aug          = False  # @param{type:"boolean"}
# @markdown Use `clean_caption` option to clean such as duplicate tags, `women` to `girl`, etc
clean_caption     = True #@param {type:"boolean"}
#@markdown Use the `recursive` option to process subfolders as well
recursive         = True #@param {type:"boolean"}

metadata_config = {
    "_train_data_dir": train_data_dir,
    "_out_json": metadata_json,
    "recursive": recursive,
    "full_path": recursive,
    "clean_caption": clean_caption
}

bucketing_config = {
    "min_bucket_reso": f"{min_bucket_reso}",
    "max_bucket_reso": f"{max_bucket_reso}",
    "_train_data_dir": train_data_dir,
    "_in_json": metadata_json,
    "_out_json": bucketing_json,
    "_model_name_or_path": vae_path if vae_path else model_path,
    "recursive": recursive,
    "full_path": recursive,
    "flip_aug": flip_aug,
    "skip_existing": skip_existing,
    "batch_size": 1,
    "max_data_loader_n_workers": 2,
    "max_resolution": f"{bucket_resolution}, {bucket_resolution}",
    "mixed_precision": mixed_precision,
}

def generate_args(config):
    args = ""
    for k, v in config.items():
        if k.startswith("_"):
            args += f'"{v}" '
        elif isinstance(v, str):
            args += f'--{k}="{v}" '
        elif isinstance(v, bool) and v:
            args += f"--{k} "
        elif isinstance(v, float) and not isinstance(v, bool):
            args += f"--{k}={v} "
        elif isinstance(v, int) and not isinstance(v, bool):
            args += f"--{k}={v} "
    return args.strip()

merge_metadata_args = generate_args(metadata_config)
prepare_buckets_args = generate_args(bucketing_config)

merge_metadata_command = f"python merge_all_to_metadata.py {merge_metadata_args}"
prepare_buckets_command = f"python prepare_buckets_latents.py {prepare_buckets_args}"

os.chdir(finetune_dir)
!{merge_metadata_command}
time.sleep(1)
!{prepare_buckets_command}

but it upscale 1024x1024 to 1536

@ArtLeav
Copy link
Author

ArtLeav commented Mar 8, 2024

I made simple code to resize pictures, but doesn't know how implement it to latents buckets


from PIL import Image
import os

src_dir = r'C:\Path\to\Images\Input'
dst_dir = r'C:\Path\to\Images\Output'

quality_val = 100

resolutions = [(1024, 1024), (896, 1152), (832, 1216), (768, 1344), (640, 1536), (1152, 896), (1216, 832), (1344, 768), (1536, 640)]

def resize_and_crop(img, size):
    img_ratio = img.size[0] / float(img.size[1])
    ratio = size[0] / float(size[1])
    
    if ratio > img_ratio:
        img = img.resize((size[0], int(size[0] * img.size[1] / img.size[0])), Image.LANCZOS)
        box = (0, (img.size[1] - size[1]) / 2, img.size[0], (img.size[1] + size[1]) / 2)
        img = img.crop(box)
    elif ratio < img_ratio:
        img = img.resize((int(size[1] * img.size[0] / img.size[1]), size[1]), Image.LANCZOS)
        box = ((img.size[0] - size[0]) / 2, 0, (img.size[0] + size[0]) / 2, img.size[1])
        img = img.crop(box)
    else :
        img = img.resize((size[0], size[1]), Image.LANCZOS)
    return img

def closest(lst, K): 
     return lst[min(range(len(lst)), key = lambda i: abs(lst[i][0]/lst[i][1]-K))]

for filename in os.listdir(src_dir):
    if filename.endswith(('.jpg', '.png', '.jpeg')):  # add file types as needed
        img = Image.open(os.path.join(src_dir, filename))
        closest_res = closest(resolutions, img.size[0]/img.size[1])
        new_img = resize_and_crop(img, closest_res)
        if new_img.mode == 'RGBA':
            new_img = new_img.convert('RGB')
        new_filename = f'{os.path.splitext(filename)[0]}.jpg'
        new_img.save(os.path.join(dst_dir, new_filename), quality=quality_val)

@Taikakim
Copy link

I just set the bucket_no_upscale flag, that fixed it, and just in case changed the max reso:

But weird thing was that if my training resolution was set to 1472 for an example, I was starting to get really bad results after just a few hundred steps. So for now I'm keeping the training resolution at 1024px and output is fine. But I'm not sure what the code internally is doing now with images which are larger.

# @title ## **3.4. Bucketing and Latents Caching**
%store -r

# @markdown This code will create buckets based on the `bucket_resolution` provided for multi-aspect ratio training, and then convert all images within the `train_data_dir` to latents.
bucketing_json    = os.path.join(training_dir, "meta_lat.json")
metadata_json     = os.path.join(training_dir, "meta_clean.json")
bucket_resolution = 1472  # @param {type:"slider", min:512, max:2048, step:32}
mixed_precision   = "bf16"  # @param ["no", "fp16", "bf16"] {allow-input: false}
flip_aug          = False  # @param{type:"boolean"}
# @markdown Use `clean_caption` option to clean such as duplicate tags, `women` to `girl`, etc
clean_caption     = False #@param {type:"boolean"}
#@markdown Use the `recursive` option to process subfolders as well
recursive         = False #@param {type:"boolean"}
skip_existing     = True #@param {type: "boolean"}
bucket_no_upscale = True #@param {type: "boolean"}

metadata_config = {
    "_train_data_dir": train_data_dir,
    "_out_json": metadata_json,
    "recursive": recursive,
    "full_path": recursive,
    "clean_caption": clean_caption
}

bucketing_config = {
    "_train_data_dir": train_data_dir,
    "_in_json": metadata_json,
    "_out_json": bucketing_json,
    "_model_name_or_path": model_path,
    "recursive": recursive,
    "full_path": recursive,
    "flip_aug": flip_aug,
    "batch_size": 24,
    "max_data_loader_n_workers": 8,
    "max_resolution": f"{bucket_resolution}, {bucket_resolution}",
    "mixed_precision": mixed_precision,
    "skip_existing": skip_existing,
    "bucket_no_upscale": bucket_no_upscale
}

def generate_args(config):
    args = ""
    for k, v in config.items():
        if k.startswith("_"):
            args += f'"{v}" '
        elif isinstance(v, str):
            args += f'--{k}="{v}" '
        elif isinstance(v, bool) and v:
            args += f"--{k} "
        elif isinstance(v, float) and not isinstance(v, bool):
            args += f"--{k}={v} "
        elif isinstance(v, int) and not isinstance(v, bool):
            args += f"--{k}={v} "
    return args.strip()

merge_metadata_args = generate_args(metadata_config)
prepare_buckets_args = generate_args(bucketing_config)

merge_metadata_command = f"python merge_all_to_metadata.py {merge_metadata_args}"
prepare_buckets_command = f"python prepare_buckets_latents.py {prepare_buckets_args}"

os.chdir(finetune_dir)
!{merge_metadata_command}
time.sleep(1)
!{prepare_buckets_command}

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants