{
  "architectures": [
    "MoEGPTForCausalLM"
  ],
  "auto_map": {
    "AutoConfig": "robinfaro/GPT2-1B-base--configuration.MoEGPTConfig",
    "AutoModelForCausalLM": "robinfaro/GPT2-1B-base--modeling.MoEGPTForCausalLM"
  },
  "batch_size": 16,
  "bias": false,
  "dropout": 0.0,
  "mlp_dim_exp_factor": 1.0,
  "model_type": "moegpt",
  "moe": false,
  "moe_aux_loss_factor": 0.01,
  "moe_num_experts": 6,
  "moe_num_experts_per_tok": 2,
  "moe_router_loss": "load_balancing_z_loss",
  "moe_routing": null,
  "moe_softmax_order": "softmax_topk",
  "moe_z_loss_factor": 1.0,
  "n_embd": 1600,
  "n_head": 25,
  "n_layer": 48,
  "sequence_length": 1024,
  "torch_dtype": "float32",
  "transformers_version": "4.51.0",
  "vocab_size": 50304
}