set chat_template in datasets config automatically (#1664)
Browse files* set chat_template in datasets config automatically
* dynamic chat_template, not jsut chatml
src/axolotl/utils/config/__init__.py
CHANGED
@@ -187,19 +187,22 @@ def normalize_cfg_datasets(cfg):
|
|
187 |
helpers for mapping chat_template to various dataset configurations as necessary
|
188 |
"""
|
189 |
|
190 |
-
if cfg.chat_template
|
191 |
if cfg.datasets:
|
192 |
for idx, ds_cfg in enumerate(cfg.datasets):
|
193 |
if ds_cfg.type == "sharegpt" and not ds_cfg.conversation:
|
194 |
LOG.info(
|
195 |
-
f"updating dataset {ds_cfg.path} with `conversation:
|
196 |
)
|
197 |
-
cfg.datasets[idx].conversation =
|
198 |
-
if
|
|
|
|
|
|
|
199 |
LOG.info(
|
200 |
-
f"updating dataset {ds_cfg.path} with `chat_template:
|
201 |
)
|
202 |
-
cfg.datasets[idx].chat_template =
|
203 |
|
204 |
|
205 |
def validate_config(cfg: DictDefault, capabilities: Optional[dict] = None):
|
|
|
187 |
helpers for mapping chat_template to various dataset configurations as necessary
|
188 |
"""
|
189 |
|
190 |
+
if cfg.chat_template:
|
191 |
if cfg.datasets:
|
192 |
for idx, ds_cfg in enumerate(cfg.datasets):
|
193 |
if ds_cfg.type == "sharegpt" and not ds_cfg.conversation:
|
194 |
LOG.info(
|
195 |
+
f"updating dataset {ds_cfg.path} with `conversation: {cfg.chat_template}` to match your chat_template"
|
196 |
)
|
197 |
+
cfg.datasets[idx].conversation = cfg.chat_template
|
198 |
+
if (
|
199 |
+
ds_cfg.type in ["orpo.chat_template", "chat_template"]
|
200 |
+
and not ds_cfg.chat_template
|
201 |
+
):
|
202 |
LOG.info(
|
203 |
+
f"updating dataset {ds_cfg.path} with `chat_template: {cfg.chat_template}` to match your chat_template"
|
204 |
)
|
205 |
+
cfg.datasets[idx].chat_template = cfg.chat_template
|
206 |
|
207 |
|
208 |
def validate_config(cfg: DictDefault, capabilities: Optional[dict] = None):
|