from dataclasses import dataclass from typing import Optional @dataclass class OpenPeerConfig: """Configuration class for OpenPeerLLM""" vocab_size: int = 50257 # GPT-2 vocabulary size hidden_size: int = 768 # Size of the hidden layers num_hidden_layers: int = 12 # Number of transformer layers num_attention_heads: int = 12 # Number of attention heads intermediate_size: int = 3072 # Size of the MLP intermediate layer max_position_embeddings: int = 1024 # Maximum sequence length layer_norm_eps: float = 1e-5 # Layer normalization epsilon hidden_dropout: float = 0.1 # Dropout probability for hidden layers attention_dropout: float = 0.1 # Dropout probability for attention layers def to_dict(self): """Convert the config to a dictionary""" return { "vocab_size": self.vocab_size, "hidden_size": self.hidden_size, "num_hidden_layers": self.num_hidden_layers, "num_attention_heads": self.num_attention_heads, "intermediate_size": self.intermediate_size, "max_position_embeddings": self.max_position_embeddings, "layer_norm_eps": self.layer_norm_eps, "hidden_dropout": self.hidden_dropout, "attention_dropout": self.attention_dropout, "model_type": "openpeer_llm", "architectures": ["OpenPeerLLM"], } @classmethod def from_dict(cls, config_dict): """Create a config from a dictionary""" return cls( vocab_size=config_dict.get("vocab_size", 50257), hidden_size=config_dict.get("hidden_size", 768), num_hidden_layers=config_dict.get("num_hidden_layers", 12), num_attention_heads=config_dict.get("num_attention_heads", 12), intermediate_size=config_dict.get("intermediate_size", 3072), max_position_embeddings=config_dict.get("max_position_embeddings", 1024), layer_norm_eps=config_dict.get("layer_norm_eps", 1e-5), hidden_dropout=config_dict.get("hidden_dropout", 0.1), attention_dropout=config_dict.get("attention_dropout", 0.1), )