File size: 4,573 Bytes
c614b0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
# LHM-500M
experiment:
type: lrm
seed: 42
parent: video_human_benchmark
child: human-lrm-500M
model:
# image encoder
model_name: SapDinoLRMBHSD3_5
encoder_type: dinov2_fusion
encoder_model_name: "dinov2_vitl14_reg"
encoder_feat_dim: 1024 # dinov2 embeding size 1024
encoder_freeze: False
fine_encoder_type: sapiens
fine_encoder_model_name: "./pretrained_models/sapiens/pretrained/checkpoints/sapiens_1b/sapiens_1b_epoch_173_torchscript.pt2" # sapiens pretrained model path
fine_encoder_feat_dim: 1536 # sapiens embeding size 1024
fine_encoder_freeze: True
use_face_id: True
# points embeddings
# num_pcl: 10240
latent_query_points_type: "e2e_smplx_sub1"
pcl_dim: 1024
facesr: True
transformer_type: "sd3_mm_bh_cond" # multi-modal BH attention.
transformer_heads: 16 # 30
transformer_dim: 1024 # 30 * 64=1920
transformer_layers: 5 # 30
tf_grad_ckpt: true
encoder_grad_ckpt: true
# for gs renderer
human_model_path: "./pretrained_models/human_model_files"
smplx_subdivide_num: 1
smplx_type: "smplx_2"
gs_query_dim: 1024
gs_use_rgb: True
gs_sh: 3
dense_sample_pts: 40000 # 4,000
gs_mlp_network_config:
n_neurons: 512
n_hidden_layers: 2
activation: silu
# gs_xyz_offset_max_step: 0.05625 # 1.8 / 32
# gs_clip_scaling: 0.2 # avoid too large Sphere
gs_xyz_offset_max_step: 1. # 1.8 / 32
gs_clip_scaling: [100, 0.01, 0.05, 3000] # [start, start_v, end_v, end]
expr_param_dim: 100
shape_param_dim: 10
fix_opacity: False
fix_rotation: False
cano_pose_type: 1 # 0 means exavatar-pose 1 indicates REC-MV pose
dataset:
subsets:
- name: video_human_flame
root_dirs: "./train_data/ClothVideo"
meta_path:
train: "./train_data/ClothVideo/label/valid_id_with_img_list_clean_30W.json"
val: "./train_data/ClothVideo/label/valid_id_with_img_list_val.json"
sample_rate: 1.0
use_flame: True
src_head_size: 112
- name: video_human_flame_v2
root_dirs: "./train_data/ClothVideo"
meta_path:
train: "./train_data/ClothVideo/label/valid_synthetic_data_train.json"
val: "./train_data/ClothVideo/label/valid_synthetic_data_val.json"
sample_rate: 1.0
use_flame: True
src_head_size: 112
sample_side_views: 5
source_image_res: 1024
src_head_size: 112
render_image:
low: 512
high: 512
region: null
num_train_workers: 4
multiply: 16 # dino features
num_val_workers: 2
pin_mem: true
repeat_num: 1
train:
mixed_precision: bf16 # REPLACE THIS BASED ON GPU TYPE
find_unused_parameters: false
loss_func:
pixel_loss: l1 # L1 or MSE
ball_loss:
type: heuristic # heuristic ball_loss
group:
head: 1.
lower_body: 100.
upper_body: 1000.
hands: 10000.
offset_loss:
type: classical # heuristic ball_loss
group:
head: 1.
lower_body: 1.
upper_body: 100.
hands: 1000.
loss:
pixel_weight: 0.0
masked_pixel_weight: 1.0
masked_head_weight: 0.0
perceptual_weight: 1.0
# tv_weight: 5e-4
tv_weight: -1
mask_weight: 1.0
face_id_weight: 0.05
asap_weight: 10.0 # ball loss
acap_weight: 1000.0 # offset loss
optim:
lr: 4e-5
weight_decay: 0.05
beta1: 0.9
beta2: 0.95
clip_grad_norm: 0.1 # diffusion model
scheduler:
type: cosine
warmup_real_iters: 0
batch_size: 4 # REPLACE THIS (PER GPU)
accum_steps: 1 # REPLACE THIS
epochs: 60 # REPLACE THIS
debug_global_steps: null
val:
batch_size: 2
global_step_period: 1000
debug_batches: 10
saver:
auto_resume: True
load_model: None
checkpoint_root: ./exps/checkpoints
checkpoint_global_steps: 1000
checkpoint_keep_level: 60
logger:
stream_level: WARNING
log_level: INFO
log_root: ./exps/logs
tracker_root: ./exps/trackers
enable_profiler: false
trackers:
- tensorboard
image_monitor:
train_global_steps: 100
samples_per_log: 4
compile:
suppress_errors: true
print_specializations: true
disable: true
|