{ "architectures": [ "MoEGPTForCausalLM" ], "auto_map": { "AutoConfig": "robinfaro/GPT2-1B-base--configuration.MoEGPTConfig", "AutoModelForCausalLM": "robinfaro/GPT2-1B-base--modeling.MoEGPTForCausalLM" }, "batch_size": 16, "bias": false, "dropout": 0.0, "mlp_dim_exp_factor": 1.0, "model_type": "moegpt", "moe": false, "moe_aux_loss_factor": 0.01, "moe_num_experts": 6, "moe_num_experts_per_tok": 2, "moe_router_loss": "load_balancing_z_loss", "moe_routing": null, "moe_softmax_order": "softmax_topk", "moe_z_loss_factor": 1.0, "n_embd": 1600, "n_head": 25, "n_layer": 48, "sequence_length": 1024, "torch_dtype": "float32", "transformers_version": "4.51.0", "vocab_size": 50304 }