{ "architectures": [ "MoEGPTForCausalLM" ], "auto_map": { "AutoConfig": "configuration.MoEGPTConfig", "AutoModelForCausalLM": "modeling.MoEGPTForCausalLM", "AutoTokenizer": "GPT2TokenizerFast" }, "batch_size": 16, "bias": false, "dropout": 0.0, "mlp_dim_exp_factor": 1.0, "model_type": "moegpt", "moe": true, "moe_aux_loss_factor": 0.01, "moe_num_experts": 6, "moe_num_experts_per_tok": 2, "moe_router_loss": "load_balancing_z_loss", "moe_routing": "standard_gating", "moe_softmax_order": "softmax_topk", "moe_z_loss_factor": 1.0, "n_embd": 1152, "n_head": 16, "n_layer": 24, "sequence_length": 1024, "torch_dtype": "float32", "transformers_version": "4.51.3", "vocab_size": 50304 }