Update README.md
Browse files
README.md
CHANGED
@@ -71,6 +71,7 @@ class UpcycledModelMixin:
|
|
71 |
@classmethod
|
72 |
def upcycled_from(cls, source_model, config: UpcyclingConfig) -> Self:
|
73 |
upcycled_model_config = cls.config_class(**source_model.config.to_dict())
|
|
|
74 |
if hasattr(upcycled_model_config, "shared_expert_intermediate_size"):
|
75 |
upcycled_model_config.shared_expert_intermediate_size = source_model.config.intermediate_size
|
76 |
|
|
|
71 |
@classmethod
|
72 |
def upcycled_from(cls, source_model, config: UpcyclingConfig) -> Self:
|
73 |
upcycled_model_config = cls.config_class(**source_model.config.to_dict())
|
74 |
+
upcycled_model_config.moe_intermediate_size = upcycled_model_config.intermediate_size // config.partitions_from_mlp
|
75 |
if hasattr(upcycled_model_config, "shared_expert_intermediate_size"):
|
76 |
upcycled_model_config.shared_expert_intermediate_size = source_model.config.intermediate_size
|
77 |
|