-
Notifications
You must be signed in to change notification settings - Fork 14
/
base.toml
212 lines (200 loc) · 8.2 KB
/
base.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# General Configuration
[general]
# The anime name used for downloading animes from nyaa.si, and potentially for downloading images from Danbooru, unless --anime_name_booru is given
anime_name = "my_favorite_anime"
# Directory containing source files
src_dir = "path/to/src_dir"
# Directory to save output files
dst_dir = "path/to/dst_dir"
# Extra path component to add between dst_dir/[training|intermediate] and image_type
# The main difference between extra_path_component and image_type is that the
# latter can also be used in captions and considered as embeddings for pivotal tuning
extra_path_component = ""
# Stage number or alias to start from
start_stage = 1
# Stage number or alias to end at
end_stage = 7
# Directory to save logs. Set to None or none to disable.
log_dir = "logs"
# Prefix for log files, defaults to --anime_name if provided otherwise 'logfile'
log_prefix = {}
# Overwrite path in metadata. It has effect at stage 2 and 4-7
# Using by starting at stage 4 will prevent character information from being written to the original images
# Should never be used in general
overwrite_path = false
# Pipeline type that is used to construct dataset
remove_intermediate = false
# Metadata Loading and Saving
[metadata_handling]
# Extension of the grabber information files to load. Attributes from this file would overwrite those loaded from --load_aux.
# load_grabber_ext = ".tags"
load_grabber_ext = {}
# List of auxiliary attributes to load
# load_aux = ["processed_tags", "characters"]
load_aux = []
# List of auxiliary attributes to save
# save_aux = ["processed_tags", "characters"]
save_aux = []
# Duplicate Detection Configuration
[duplicate_detection]
# Do not remove similar images
no_remove_similar = false
# Model used for duplicate detection
detect_duplicate_model = "mobilenetv3_large_100"
# Batch size for embedding computation used in duplicate detection
detect_duplicate_batch_size = 16
# Cosine similarity threshold for image duplicate detection
similar_thresh = 0.95
# Character Cropping Configuration
[character_cropping]
# Minimum size for character cropping
min_crop_size = 320
# Crop only images with head for character identification
crop_with_head = true
# Crop only images with face for character identification
crop_with_face = false
# The detection model level being used. The 'n' model runs faster with smaller system overhead
detect_level = "n"
# Use 3 stage crop to get halfbody and head crops
# This is slow and should only be called once for a set of images. Possible to use either at stage 2 or 4.
# Set to {} or comment this line to disable
use_3stage_crop = {}
# Character Clustering/Classification Configuration
[character_classification]
# Directory containing reference character images
character_ref_dir = "path/to/reference/character/images"
# The number of additional reference images to add to each character from classification result"
n_add_to_ref_per_character = 0
# Whether to disable matching character labels for noise images
no_extract_from_noise = false
# Whether to disable final filtering for character consistency
no_filter_characters = false
# Whether to keep unnamed clusters when reference images are provided or when characters are available in metadata
keep_unnamed_clusters = true
# Cluster merge threshold in character clustering
cluster_merge_threshold = 0.85
# Minimum cluster samples in character clustering
cluster_min_samples = 5
# The relative threshold for determining whether images belong to the same cluster for noise extraction and filtering
same_threshold_rel = 0.6
# The absolute threshold for determining whether images belong to the same cluster for noise extraction and filtering
same_threshold_abs = 20
# Dataset Construction Configuration
[dataset_construction]
# Overwrite existing trigger word csv
overwrite_emb_init_info = false
# Do not include cropped images in dataset
no_cropped_in_dataset = false
# Do not include original images in dataset
no_original_in_dataset = false
# Do not perform image resizing
no_resize = false
# Max image size that shorter edge aligns to
max_size = 768
# Dataset image extension
image_save_ext = ".webp"
# Filter repeated images again here
filter_again = true
# Tagging Configuration
[tagging]
# Whether to overwrite existing tags
overwrite_tags = false
# Method used for tagging
tagging_method = "wd14_convnextv2"
# Threshold for tagging
tag_threshold = 0.35
# General Tag Processing Configuration
[tag_processing]
# Mode to sort the tags
sort_mode = "score"
# Whether to append dropped character tags to the caption
append_dropped_character_tags = false
# Max number of tags to include in caption
max_tag_number = 30
# Path to the file containing blacklisted tags
blacklist_tags_file = "configs/tag_filtering/blacklist_tags.txt"
# Path to the file containing overlap tag information
overlap_tags_file = "configs/tag_filtering/overlap_tags.json"
# Path to the file containing character tag information
character_tags_file = "configs/tag_filtering/character_tags.json"
# Process tags from original tags instead of processed tags
process_from_original_tags = true
# Different ways to prune tags
prune_mode = "character_core"
# Core Tag Processing Configuration
[core_tag_processing]
# Number of directory levels to go up from the captioned directory when computing core tags
# Set to 1 here so that we compute a single time for all the image types
compute_core_tag_up_levels = 1
# Minimum frequency for a tag to be considered a core tag
core_frequency_thresh = 0.3
# Use existing core tag json instead of recomputing them
use_existing_core_tag_file = false
# The difficulty level up to which tags should be dropped
drop_difficulty = 2
# Whether to drop all core tags or not
drop_all_core = false
# The difficulty level from which tags should be used for embedding initialization
emb_min_difficulty = 1
# The difficulty level up to which tags should be used for embedding initialization
emb_max_difficulty = 2
# Whether to use all core tags for embedding initialization
emb_init_all_core = false
# Whether to append dropped character tags to wildcard
append_dropped_character_tags_wildcard = false
# Captioning Separation Configuration
[captioning_separation]
# For separating items of a single field of caption
caption_inner_sep = ", "
# For separating different fields of caption
caption_outer_sep = ", "
# For separating characters
character_sep = ", "
# For separating items of a single field of character
character_inner_sep = " "
# For separating different fields of character
character_outer_sep = ", "
# Separator for keep tokens for Kohya trainer
keep_tokens_sep = {}
# Where to put --keep_tokens_sep before
keep_tokens_before = "tags"
# Captioning Components Configuration
[captioning_components]
# Order of captions
caption_ordering = ["npeople", "character", "copyright", "image_type", "artist", "rating", "crop_info", "tags"]
# Probability to include number of people in captions
use_npeople_prob = 0
# Probability to include character info in captions
use_character_prob = 1
# Probability to include copyright info in captions
use_copyright_prob = 0
# Probability to include image type info in captions
use_image_type_prob = 1
# Probability to include artist info in captions
use_artist_prob = 0
# Probability to include rating info in captions
use_rating_prob = 1
# Probability to include crop info in captions
use_crop_info_prob = 1
# Probability to include tag info in captions
use_tags_prob = 1
# Folder Organization Configuration
[folder_organization]
# Number of directory levels to go up from the captioned directory when setting the source directory for the rearrange stage
rearrange_up_levels = 0
# Description of the concept balancing directory hierarchy
arrange_format = "n_characters/character"
# If have more than X characters put X+
max_character_number = 6
# Put others instead of character name if number of images of the character combination is smaller than this number
min_images_per_combination = 10
# Balancing Configuration
[balancing]
# Number of directory levels to go up from the rearranged directory when setting the source directory for the compute multiply stage
compute_multiply_up_levels = 1
# Minimum multiply of each image
min_multiply = 1
# Maximum multiply of each image
max_multiply = 100
# If provided use the provided csv to modify weights
weight_csv = "configs/csv_examples/default_weighting.csv"