gabrielmbmb commited on
Commit
d265440
·
verified ·
1 Parent(s): 8761fe4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -0
README.md CHANGED
@@ -71,6 +71,7 @@ class UpcycledModelMixin:
71
  @classmethod
72
  def upcycled_from(cls, source_model, config: UpcyclingConfig) -> Self:
73
  upcycled_model_config = cls.config_class(**source_model.config.to_dict())
 
74
  if hasattr(upcycled_model_config, "shared_expert_intermediate_size"):
75
  upcycled_model_config.shared_expert_intermediate_size = source_model.config.intermediate_size
76
 
 
71
  @classmethod
72
  def upcycled_from(cls, source_model, config: UpcyclingConfig) -> Self:
73
  upcycled_model_config = cls.config_class(**source_model.config.to_dict())
74
+ upcycled_model_config.moe_intermediate_size = upcycled_model_config.intermediate_size // config.partitions_from_mlp
75
  if hasattr(upcycled_model_config, "shared_expert_intermediate_size"):
76
  upcycled_model_config.shared_expert_intermediate_size = source_model.config.intermediate_size
77