LHM

Runtime error

LHM

File size: 4,573 Bytes

c614b0f

# LHM-500M
experiment:
    type: lrm
    seed: 42
    parent: video_human_benchmark
    child: human-lrm-500M 
model:
    # image encoder
    model_name: SapDinoLRMBHSD3_5
    encoder_type: dinov2_fusion
    encoder_model_name: "dinov2_vitl14_reg"
    encoder_feat_dim: 1024  # dinov2 embeding size 1024
    encoder_freeze: False 

    fine_encoder_type: sapiens
    fine_encoder_model_name: "./pretrained_models/sapiens/pretrained/checkpoints/sapiens_1b/sapiens_1b_epoch_173_torchscript.pt2"  # sapiens pretrained model path
    fine_encoder_feat_dim: 1536 # sapiens embeding size 1024
    fine_encoder_freeze: True 

    use_face_id: True

    # points embeddings
    # num_pcl: 10240
    latent_query_points_type: "e2e_smplx_sub1"
    pcl_dim: 1024
    facesr: True

    transformer_type: "sd3_mm_bh_cond"  # multi-modal BH attention.
    transformer_heads: 16  # 30
    transformer_dim: 1024  # 30 * 64=1920
    transformer_layers: 5 # 30
    tf_grad_ckpt: true
    encoder_grad_ckpt: true

    # for gs renderer
    human_model_path: "./pretrained_models/human_model_files"
    smplx_subdivide_num: 1
    smplx_type: "smplx_2"
    gs_query_dim: 1024
    gs_use_rgb: True
    gs_sh: 3
    dense_sample_pts: 40000  # 4,000
    gs_mlp_network_config:
        n_neurons: 512
        n_hidden_layers: 2
        activation: silu
    # gs_xyz_offset_max_step: 0.05625  # 1.8 / 32
    # gs_clip_scaling: 0.2  # avoid too large Sphere
    gs_xyz_offset_max_step: 1.  # 1.8 / 32
    gs_clip_scaling: [100, 0.01, 0.05, 3000] # [start, start_v, end_v, end] 
    expr_param_dim: 100
    shape_param_dim: 10

    fix_opacity: False 
    fix_rotation: False 
    cano_pose_type: 1  # 0 means exavatar-pose 1 indicates REC-MV pose

dataset:
    subsets:
        -   name: video_human_flame
            root_dirs: "./train_data/ClothVideo"
            meta_path:
                train: "./train_data/ClothVideo/label/valid_id_with_img_list_clean_30W.json"
                val: "./train_data/ClothVideo/label/valid_id_with_img_list_val.json"
            sample_rate: 1.0
            use_flame: True 
            src_head_size: 112
        -   name: video_human_flame_v2
            root_dirs: "./train_data/ClothVideo"
            meta_path:
                train: "./train_data/ClothVideo/label/valid_synthetic_data_train.json"
                val: "./train_data/ClothVideo/label/valid_synthetic_data_val.json"
            sample_rate: 1.0
            use_flame: True 
            src_head_size: 112
    sample_side_views: 5 
    source_image_res: 1024 
    src_head_size: 112
    render_image:
        low: 512 
        high: 512 
        region: null
    num_train_workers: 4
    multiply: 16  # dino features
    num_val_workers: 2
    pin_mem: true
    repeat_num: 1

train:
    mixed_precision: bf16  # REPLACE THIS BASED ON GPU TYPE
    find_unused_parameters: false
    loss_func:
        pixel_loss: l1  # L1 or MSE
        ball_loss:
            type: heuristic  # heuristic ball_loss 
            group: 
                head: 1.
                lower_body: 100.
                upper_body: 1000.
                hands: 10000.
        offset_loss:
            type: classical # heuristic ball_loss 
            group: 
                head: 1.
                lower_body: 1.  
                upper_body: 100.
                hands: 1000.
    loss:
        pixel_weight: 0.0
        masked_pixel_weight: 1.0
        masked_head_weight: 0.0
        perceptual_weight: 1.0
        # tv_weight: 5e-4
        tv_weight: -1
        mask_weight: 1.0
        face_id_weight: 0.05 
        asap_weight: 10.0  # ball loss
        acap_weight: 1000.0  # offset loss
    optim:
        lr: 4e-5
        weight_decay: 0.05
        beta1: 0.9
        beta2: 0.95
        clip_grad_norm: 0.1  # diffusion model
    scheduler:
        type: cosine
        warmup_real_iters: 0
    batch_size: 4  # REPLACE THIS (PER GPU)
    accum_steps: 1  # REPLACE THIS
    epochs: 60  # REPLACE THIS
    debug_global_steps: null

val:
    batch_size: 2
    global_step_period: 1000
    debug_batches: 10

saver:
    auto_resume: True 
    load_model: None
    checkpoint_root: ./exps/checkpoints
    checkpoint_global_steps: 1000
    checkpoint_keep_level: 60 

logger:
    stream_level: WARNING
    log_level: INFO
    log_root: ./exps/logs
    tracker_root: ./exps/trackers
    enable_profiler: false
    trackers:
        - tensorboard
    image_monitor:
        train_global_steps: 100
        samples_per_log: 4

compile:
    suppress_errors: true
    print_specializations: true
    disable: true