old_tok / configs /tokenizer_config.yaml
tennant's picture
upload
af7c0ce
raw
history blame
1.61 kB
trainer:
target: paintmind.engine.trainer.DiffusionTrainer
params:
num_epoch: 400
valid_size: 64
blr: 2.5e-5
cosine_lr: True
warmup_epochs: 100
batch_size: 32
num_workers: 16
pin_memory: True
grad_accum_steps: 1
precision: 'bf16'
max_grad_norm: 3.0
enable_ema: True
save_every: 10000
sample_every: 5000
fid_every: 50000
result_folder: "./output/tokenizer"
log_dit: "./output/tokenizer/logs"
cfg: 3.0
compile: True
model:
target: paintmind.stage1.diffuse_slot.DiffuseSlot
params:
encoder: 'vit_base_patch16'
enc_img_size: 256
enc_causal: True
enc_use_mlp: False
num_slots: 256
slot_dim: 16
norm_slots: True
dit_mask_type: 'replace'
cond_method: 'token'
dit_model: 'DiT-XL-2'
vae: 'xwen99/mar-vae-kl16'
enable_nest: False
enable_nest_after: 50
nest_rho: 0.03
nest_dist: uniform
nest_null_prob: 0
nest_allow_zero: False
use_repa: True
repa_encoder: dinov2_vitb
repa_encoder_depth: 8
repa_loss_weight: 1.0
eval_fid: True
fid_stats: 'fid_stats/adm_in256_stats.npz'
num_sampling_steps: '250'
ckpt_path: None
dataset:
target: paintmind.utils.datasets.ImageNet
params:
root: ./dataset/imagenet/
split: train
img_size: 256
test_dataset:
target: paintmind.utils.datasets.ImageNet
params:
root: ./dataset/imagenet/
split: val
img_size: 256