rayonlabs

RoyJoy commited on Dec 23, 2024

Commit

af68794

verified ·

0 Parent(s):

Duplicate from sn56/35f01a57-b51b-4bdd-a301-acb8ff427029

Browse files

Co-authored-by: Roy Joy <RoyJoy@users.noreply.huggingface.co>

Files changed (26) hide show

.gitattributes +35 -0
README.md +192 -0
adapter_config.json +34 -0
adapter_model.bin +3 -0
adapter_model.safetensors +3 -0
config.json +31 -0
last-checkpoint/README.md +202 -0
last-checkpoint/adapter_config.json +34 -0
last-checkpoint/adapter_model.safetensors +3 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/rng_state_0.pth +3 -0
last-checkpoint/rng_state_1.pth +3 -0
last-checkpoint/rng_state_2.pth +3 -0
last-checkpoint/rng_state_3.pth +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/special_tokens_map.json +30 -0
last-checkpoint/tokenizer.json +0 -0
last-checkpoint/tokenizer.model +3 -0
last-checkpoint/tokenizer_config.json +43 -0
last-checkpoint/trainer_state.json +782 -0
last-checkpoint/training_args.bin +3 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +43 -0
training_args.bin +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,192 @@

+---
+library_name: peft
+license: apache-2.0
+base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
+tags:
+- axolotl
+- generated_from_trainer
+model-index:
+- name: 35f01a57-b51b-4bdd-a301-acb8ff427029
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+<details><summary>See axolotl config</summary>
+axolotl version: `0.4.1`
+```yaml
+adapter: lora
+base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
+bf16: auto
+chat_template: llama3
+cosine_min_lr_ratio: 0.1
+data_processes: 4
+dataset_prepared_path: null
+datasets:
+- data_files:
+  - 1c76abd530c8890a_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/1c76abd530c8890a_train_data.json
+  type:
+    field_instruction: instruction
+    field_output: output
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+debug: null
+deepspeed: null
+device_map:
+  lm_head: 3
+  model.embed_tokens: 0
+  model.layers.0: 0
+  model.layers.1: 0
+  model.layers.10: 3
+  model.layers.11: 3
+  model.layers.2: 0
+  model.layers.3: 1
+  model.layers.4: 1
+  model.layers.5: 1
+  model.layers.6: 2
+  model.layers.7: 2
+  model.layers.8: 2
+  model.layers.9: 3
+  model.norm: 3
+do_eval: true
+early_stopping_patience: 1
+eval_batch_size: 1
+eval_sample_packing: false
+eval_steps: 25
+evaluation_strategy: steps
+flash_attention: false
+fp16: null
+fsdp: null
+fsdp_config: null
+gradient_accumulation_steps: 32
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: sn56/35f01a57-b51b-4bdd-a301-acb8ff427029
+hub_strategy: checkpoint
+hub_token: null
+learning_rate: 0.0001
+load_in_4bit: false
+load_in_8bit: false
+local_rank: null
+logging_steps: 1
+lora_alpha: 64
+lora_dropout: 0.05
+lora_fan_in_fan_out: null
+lora_model_dir: null
+lora_r: 32
+lora_target_linear: true
+lora_target_modules:
+- q_proj
+- v_proj
+lr_scheduler: cosine
+max_grad_norm: 0.3
+max_memory:
+  0: 60GB
+  1: 70GB
+  2: 70GB
+  3: 70GB
+  cpu: 96GB
+max_steps: 100
+micro_batch_size: 1
+mixed_precision: bf16
+mlflow_experiment_name: /tmp/1c76abd530c8890a_train_data.json
+model_type: AutoModelForCausalLM
+num_epochs: 3
+optim_args:
+  adam_beta1: 0.9
+  adam_beta2: 0.95
+  adam_epsilon: 1e-5
+optimizer: adamw_torch
+output_dir: miner_id_24
+pad_to_sequence_len: true
+resume_from_checkpoint: null
+s2_attention: null
+sample_packing: false
+save_steps: 25
+save_strategy: steps
+sequence_len: 2048
+strict: false
+tf32: false
+tokenizer_type: AutoTokenizer
+torch_compile: false
+torch_dtype: bfloat16
+train_on_inputs: false
+trust_remote_code: true
+use_cache: false
+val_set_size: 50
+wandb_entity: null
+wandb_mode: online
+wandb_name: 35f01a57-b51b-4bdd-a301-acb8ff427029
+wandb_project: Public_TuningSN
+wandb_runid: 35f01a57-b51b-4bdd-a301-acb8ff427029
+warmup_ratio: 0.05
+weight_decay: 0.01
+xformers_attention: null
+```
+</details><br>
+# 35f01a57-b51b-4bdd-a301-acb8ff427029
+This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.2965
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 4
+- gradient_accumulation_steps: 32
+- total_train_batch_size: 128
+- total_eval_batch_size: 4
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=adam_beta1=0.9,adam_beta2=0.95,adam_epsilon=1e-5
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 5
+- training_steps: 100
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 1.1205        | 0.0025 | 1    | 1.5502          |
+| 2.3234        | 0.0621 | 25   | 1.3512          |
+| 2.1245        | 0.1242 | 50   | 1.3093          |
+| 2.1532        | 0.1864 | 75   | 1.3022          |
+| 2.0062        | 0.2485 | 100  | 1.2965          |
+### Framework versions
+- PEFT 0.13.2
+- Transformers 4.46.0
+- Pytorch 2.5.0+cu124
+- Datasets 3.0.1
+- Tokenizers 0.20.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "down_proj",
+    "o_proj",
+    "up_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e2cf11d22a7a7150cfad40e6f9b30f7f08a0cd49c074de6cc899599aeacd33a
+size 101036698

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07b0e909cdfe7d161b6f5bf72fc7a94d1aa6aa8c181c34453e6951107a9cd39f
+size 100966336

config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_attn_implementation_autoset": true,
+  "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 5632,
+  "max_position_embeddings": 2048,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 22,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.46.0",
+  "use_cache": false,
+  "vocab_size": 32000
+}

last-checkpoint/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

last-checkpoint/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "down_proj",
+    "o_proj",
+    "up_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

last-checkpoint/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07b0e909cdfe7d161b6f5bf72fc7a94d1aa6aa8c181c34453e6951107a9cd39f
+size 100966336

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8456a8db082fa56b574b57ba41eaf2a8bc24d8a9359f4c91bd7a09a706af4ef1
+size 202110330

last-checkpoint/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50c0ae168e2ac6423fc22e5c53b5afe9cd2a546fa9508290240454dc5b510159
+size 15024

last-checkpoint/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa227bfc5d392392afa2a54851b7dcbf15cf8ba848c20449a4aec2d922256866
+size 15024

last-checkpoint/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5287b38cd630aab26ddfbc0b0a2e825c95832185fbe17db2cc7cd867181b6dc
+size 15024

last-checkpoint/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4494aa87bcf898f5c06998891d4aee323c55900b54a38390c1b9b9b9777cc078
+size 15024

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:051dee7dfbeecb34b46e8409ffafec324501f465585234624669bc8c9e863ae4
+size 1064

last-checkpoint/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

last-checkpoint/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

last-checkpoint/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 2048,
+  "pad_token": "</s>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,782 @@

+{
+  "best_metric": 1.296454668045044,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.24848578971890045,
+  "eval_steps": 25,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0024848578971890046,
+      "grad_norm": 0.19135846197605133,
+      "learning_rate": 2e-05,
+      "loss": 1.1205,
+      "step": 1
+    },
+    {
+      "epoch": 0.0024848578971890046,
+      "eval_loss": 1.5502378940582275,
+      "eval_runtime": 1.1969,
+      "eval_samples_per_second": 41.775,
+      "eval_steps_per_second": 10.861,
+      "step": 1
+    },
+    {
+      "epoch": 0.004969715794378009,
+      "grad_norm": 0.20889164507389069,
+      "learning_rate": 4e-05,
+      "loss": 1.1432,
+      "step": 2
+    },
+    {
+      "epoch": 0.007454573691567014,
+      "grad_norm": 0.23940329253673553,
+      "learning_rate": 6e-05,
+      "loss": 1.1905,
+      "step": 3
+    },
+    {
+      "epoch": 0.009939431588756018,
+      "grad_norm": 0.270460307598114,
+      "learning_rate": 8e-05,
+      "loss": 1.2135,
+      "step": 4
+    },
+    {
+      "epoch": 0.012424289485945022,
+      "grad_norm": 0.2930460274219513,
+      "learning_rate": 0.0001,
+      "loss": 1.2384,
+      "step": 5
+    },
+    {
+      "epoch": 0.014909147383134027,
+      "grad_norm": 0.33367636799812317,
+      "learning_rate": 9.997539658034168e-05,
+      "loss": 1.3348,
+      "step": 6
+    },
+    {
+      "epoch": 0.017394005280323033,
+      "grad_norm": 0.39504995942115784,
+      "learning_rate": 9.990161322484486e-05,
+      "loss": 1.3978,
+      "step": 7
+    },
+    {
+      "epoch": 0.019878863177512036,
+      "grad_norm": 0.5078551769256592,
+      "learning_rate": 9.977873061452552e-05,
+      "loss": 1.4639,
+      "step": 8
+    },
+    {
+      "epoch": 0.02236372107470104,
+      "grad_norm": 0.7216916084289551,
+      "learning_rate": 9.96068831197139e-05,
+      "loss": 1.626,
+      "step": 9
+    },
+    {
+      "epoch": 0.024848578971890044,
+      "grad_norm": 0.9594578742980957,
+      "learning_rate": 9.938625865312251e-05,
+      "loss": 1.6584,
+      "step": 10
+    },
+    {
+      "epoch": 0.02733343686907905,
+      "grad_norm": 1.2051019668579102,
+      "learning_rate": 9.911709846436641e-05,
+      "loss": 1.7222,
+      "step": 11
+    },
+    {
+      "epoch": 0.029818294766268055,
+      "grad_norm": 1.6941516399383545,
+      "learning_rate": 9.879969687616027e-05,
+      "loss": 1.9584,
+      "step": 12
+    },
+    {
+      "epoch": 0.03230315266345706,
+      "grad_norm": 0.5741978883743286,
+      "learning_rate": 9.84344009624807e-05,
+      "loss": 1.0769,
+      "step": 13
+    },
+    {
+      "epoch": 0.034788010560646065,
+      "grad_norm": 0.7483494281768799,
+      "learning_rate": 9.80216101690461e-05,
+      "loss": 1.1724,
+      "step": 14
+    },
+    {
+      "epoch": 0.03727286845783507,
+      "grad_norm": 0.6988890767097473,
+      "learning_rate": 9.756177587652856e-05,
+      "loss": 1.1815,
+      "step": 15
+    },
+    {
+      "epoch": 0.03975772635502407,
+      "grad_norm": 0.6390421986579895,
+      "learning_rate": 9.705540090697575e-05,
+      "loss": 1.2393,
+      "step": 16
+    },
+    {
+      "epoch": 0.042242584252213077,
+      "grad_norm": 0.5592739582061768,
+      "learning_rate": 9.650303897398232e-05,
+      "loss": 1.237,
+      "step": 17
+    },
+    {
+      "epoch": 0.04472744214940208,
+      "grad_norm": 0.4472772479057312,
+      "learning_rate": 9.590529407721231e-05,
+      "loss": 1.225,
+      "step": 18
+    },
+    {
+      "epoch": 0.047212300046591084,
+      "grad_norm": 0.381583034992218,
+      "learning_rate": 9.526281984193436e-05,
+      "loss": 1.2886,
+      "step": 19
+    },
+    {
+      "epoch": 0.04969715794378009,
+      "grad_norm": 0.896741509437561,
+      "learning_rate": 9.4576318804292e-05,
+      "loss": 1.3218,
+      "step": 20
+    },
+    {
+      "epoch": 0.05218201584096909,
+      "grad_norm": 0.46586254239082336,
+      "learning_rate": 9.384654164309083e-05,
+      "loss": 1.5014,
+      "step": 21
+    },
+    {
+      "epoch": 0.0546668737381581,
+      "grad_norm": 0.635338544845581,
+      "learning_rate": 9.30742863589421e-05,
+      "loss": 1.546,
+      "step": 22
+    },
+    {
+      "epoch": 0.057151731635347106,
+      "grad_norm": 0.8390082716941833,
+      "learning_rate": 9.226039740166091e-05,
+      "loss": 1.5184,
+      "step": 23
+    },
+    {
+      "epoch": 0.05963658953253611,
+      "grad_norm": 1.1133440732955933,
+      "learning_rate": 9.140576474687264e-05,
+      "loss": 1.878,
+      "step": 24
+    },
+    {
+      "epoch": 0.06212144742972511,
+      "grad_norm": 2.014930009841919,
+      "learning_rate": 9.051132292283771e-05,
+      "loss": 2.3234,
+      "step": 25
+    },
+    {
+      "epoch": 0.06212144742972511,
+      "eval_loss": 1.351180911064148,
+      "eval_runtime": 1.1673,
+      "eval_samples_per_second": 42.834,
+      "eval_steps_per_second": 11.137,
+      "step": 25
+    },
+    {
+      "epoch": 0.06460630532691412,
+      "grad_norm": 0.13552242517471313,
+      "learning_rate": 8.957804998855866e-05,
+      "loss": 1.03,
+      "step": 26
+    },
+    {
+      "epoch": 0.06709116322410312,
+      "grad_norm": 0.14115220308303833,
+      "learning_rate": 8.860696646428693e-05,
+      "loss": 1.106,
+      "step": 27
+    },
+    {
+      "epoch": 0.06957602112129213,
+      "grad_norm": 0.16067887842655182,
+      "learning_rate": 8.759913421559902e-05,
+      "loss": 1.1242,
+      "step": 28
+    },
+    {
+      "epoch": 0.07206087901848113,
+      "grad_norm": 0.16710247099399567,
+      "learning_rate": 8.655565529226198e-05,
+      "loss": 1.1861,
+      "step": 29
+    },
+    {
+      "epoch": 0.07454573691567014,
+      "grad_norm": 0.1886073648929596,
+      "learning_rate": 8.547767072315835e-05,
+      "loss": 1.1936,
+      "step": 30
+    },
+    {
+      "epoch": 0.07703059481285914,
+      "grad_norm": 0.2157226949930191,
+      "learning_rate": 8.436635926858759e-05,
+      "loss": 1.2286,
+      "step": 31
+    },
+    {
+      "epoch": 0.07951545271004815,
+      "grad_norm": 0.2546946406364441,
+      "learning_rate": 8.322293613130917e-05,
+      "loss": 1.2942,
+      "step": 32
+    },
+    {
+      "epoch": 0.08200031060723714,
+      "grad_norm": 0.34159988164901733,
+      "learning_rate": 8.204865162773613e-05,
+      "loss": 1.3495,
+      "step": 33
+    },
+    {
+      "epoch": 0.08448516850442615,
+      "grad_norm": 0.44573602080345154,
+      "learning_rate": 8.084478982073247e-05,
+      "loss": 1.4677,
+      "step": 34
+    },
+    {
+      "epoch": 0.08697002640161516,
+      "grad_norm": 0.5484282970428467,
+      "learning_rate": 7.961266711550922e-05,
+      "loss": 1.4124,
+      "step": 35
+    },
+    {
+      "epoch": 0.08945488429880416,
+      "grad_norm": 0.7390031218528748,
+      "learning_rate": 7.835363082015468e-05,
+      "loss": 1.6858,
+      "step": 36
+    },
+    {
+      "epoch": 0.09193974219599317,
+      "grad_norm": 1.0373011827468872,
+      "learning_rate": 7.706905767237288e-05,
+      "loss": 1.7112,
+      "step": 37
+    },
+    {
+      "epoch": 0.09442460009318217,
+      "grad_norm": 0.17684373259544373,
+      "learning_rate": 7.576035233404096e-05,
+      "loss": 1.0075,
+      "step": 38
+    },
+    {
+      "epoch": 0.09690945799037118,
+      "grad_norm": 0.13694511353969574,
+      "learning_rate": 7.442894585523218e-05,
+      "loss": 1.1016,
+      "step": 39
+    },
+    {
+      "epoch": 0.09939431588756018,
+      "grad_norm": 0.13813506066799164,
+      "learning_rate": 7.307629410938363e-05,
+      "loss": 1.0873,
+      "step": 40
+    },
+    {
+      "epoch": 0.10187917378474919,
+      "grad_norm": 0.15622101724147797,
+      "learning_rate": 7.170387620131993e-05,
+      "loss": 1.1538,
+      "step": 41
+    },
+    {
+      "epoch": 0.10436403168193818,
+      "grad_norm": 0.17008064687252045,
+      "learning_rate": 7.031319284987394e-05,
+      "loss": 1.2087,
+      "step": 42
+    },
+    {
+      "epoch": 0.1068488895791272,
+      "grad_norm": 0.1911894828081131,
+      "learning_rate": 6.890576474687263e-05,
+      "loss": 1.2157,
+      "step": 43
+    },
+    {
+      "epoch": 0.1093337474763162,
+      "grad_norm": 0.22054672241210938,
+      "learning_rate": 6.7483130894283e-05,
+      "loss": 1.263,
+      "step": 44
+    },
+    {
+      "epoch": 0.1118186053735052,
+      "grad_norm": 0.2807726562023163,
+      "learning_rate": 6.604684692133597e-05,
+      "loss": 1.3281,
+      "step": 45
+    },
+    {
+      "epoch": 0.11430346327069421,
+      "grad_norm": 0.3638734221458435,
+      "learning_rate": 6.459848338346861e-05,
+      "loss": 1.3941,
+      "step": 46
+    },
+    {
+      "epoch": 0.11678832116788321,
+      "grad_norm": 0.5523617267608643,
+      "learning_rate": 6.313962404494496e-05,
+      "loss": 1.4622,
+      "step": 47
+    },
+    {
+      "epoch": 0.11927317906507222,
+      "grad_norm": 0.6990809440612793,
+      "learning_rate": 6.167186414703289e-05,
+      "loss": 1.4193,
+      "step": 48
+    },
+    {
+      "epoch": 0.12175803696226122,
+      "grad_norm": 1.1496556997299194,
+      "learning_rate": 6.019680866363139e-05,
+      "loss": 1.7219,
+      "step": 49
+    },
+    {
+      "epoch": 0.12424289485945023,
+      "grad_norm": 2.33288836479187,
+      "learning_rate": 5.8716070546254966e-05,
+      "loss": 2.1245,
+      "step": 50
+    },
+    {
+      "epoch": 0.12424289485945023,
+      "eval_loss": 1.3092597723007202,
+      "eval_runtime": 1.1847,
+      "eval_samples_per_second": 42.206,
+      "eval_steps_per_second": 10.974,
+      "step": 50
+    },
+    {
+      "epoch": 0.12672775275663922,
+      "grad_norm": 0.13320881128311157,
+      "learning_rate": 5.7231268960295e-05,
+      "loss": 1.0636,
+      "step": 51
+    },
+    {
+      "epoch": 0.12921261065382825,
+      "grad_norm": 0.14497292041778564,
+      "learning_rate": 5.574402751448614e-05,
+      "loss": 1.135,
+      "step": 52
+    },
+    {
+      "epoch": 0.13169746855101724,
+      "grad_norm": 0.16492624580860138,
+      "learning_rate": 5.425597248551387e-05,
+      "loss": 1.0798,
+      "step": 53
+    },
+    {
+      "epoch": 0.13418232644820624,
+      "grad_norm": 0.18326567113399506,
+      "learning_rate": 5.2768731039705e-05,
+      "loss": 1.1425,
+      "step": 54
+    },
+    {
+      "epoch": 0.13666718434539524,
+      "grad_norm": 0.2107713520526886,
+      "learning_rate": 5.128392945374505e-05,
+      "loss": 1.2191,
+      "step": 55
+    },
+    {
+      "epoch": 0.13915204224258426,
+      "grad_norm": 0.2540586292743683,
+      "learning_rate": 4.980319133636863e-05,
+      "loss": 1.2378,
+      "step": 56
+    },
+    {
+      "epoch": 0.14163690013977326,
+      "grad_norm": 0.2890605628490448,
+      "learning_rate": 4.83281358529671e-05,
+      "loss": 1.2228,
+      "step": 57
+    },
+    {
+      "epoch": 0.14412175803696226,
+      "grad_norm": 0.3913767635822296,
+      "learning_rate": 4.686037595505507e-05,
+      "loss": 1.468,
+      "step": 58
+    },
+    {
+      "epoch": 0.14660661593415125,
+      "grad_norm": 0.44723743200302124,
+      "learning_rate": 4.54015166165314e-05,
+      "loss": 1.3898,
+      "step": 59
+    },
+    {
+      "epoch": 0.14909147383134028,
+      "grad_norm": 0.5722945332527161,
+      "learning_rate": 4.395315307866405e-05,
+      "loss": 1.4764,
+      "step": 60
+    },
+    {
+      "epoch": 0.15157633172852927,
+      "grad_norm": 0.6735637784004211,
+      "learning_rate": 4.2516869105717004e-05,
+      "loss": 1.5408,
+      "step": 61
+    },
+    {
+      "epoch": 0.15406118962571827,
+      "grad_norm": 0.9894400238990784,
+      "learning_rate": 4.109423525312738e-05,
+      "loss": 1.7832,
+      "step": 62
+    },
+    {
+      "epoch": 0.1565460475229073,
+      "grad_norm": 0.14697420597076416,
+      "learning_rate": 3.968680715012606e-05,
+      "loss": 1.0738,
+      "step": 63
+    },
+    {
+      "epoch": 0.1590309054200963,
+      "grad_norm": 0.12737371027469635,
+      "learning_rate": 3.829612379868006e-05,
+      "loss": 1.0667,
+      "step": 64
+    },
+    {
+      "epoch": 0.1615157633172853,
+      "grad_norm": 0.14219443500041962,
+      "learning_rate": 3.692370589061639e-05,
+      "loss": 1.0997,
+      "step": 65
+    },
+    {
+      "epoch": 0.16400062121447428,
+      "grad_norm": 0.15139789879322052,
+      "learning_rate": 3.557105414476782e-05,
+      "loss": 1.1539,
+      "step": 66
+    },
+    {
+      "epoch": 0.1664854791116633,
+      "grad_norm": 0.15827913582324982,
+      "learning_rate": 3.423964766595906e-05,
+      "loss": 1.1753,
+      "step": 67
+    },
+    {
+      "epoch": 0.1689703370088523,
+      "grad_norm": 0.19669124484062195,
+      "learning_rate": 3.293094232762715e-05,
+      "loss": 1.2182,
+      "step": 68
+    },
+    {
+      "epoch": 0.1714551949060413,
+      "grad_norm": 0.2270824909210205,
+      "learning_rate": 3.164636917984534e-05,
+      "loss": 1.2668,
+      "step": 69
+    },
+    {
+      "epoch": 0.17394005280323033,
+      "grad_norm": 0.2890926003456116,
+      "learning_rate": 3.0387332884490805e-05,
+      "loss": 1.3265,
+      "step": 70
+    },
+    {
+      "epoch": 0.17642491070041932,
+      "grad_norm": 0.3860069811344147,
+      "learning_rate": 2.9155210179267546e-05,
+      "loss": 1.4004,
+      "step": 71
+    },
+    {
+      "epoch": 0.17890976859760832,
+      "grad_norm": 0.4971885085105896,
+      "learning_rate": 2.7951348372263875e-05,
+      "loss": 1.4714,
+      "step": 72
+    },
+    {
+      "epoch": 0.18139462649479732,
+      "grad_norm": 0.6487317681312561,
+      "learning_rate": 2.677706386869083e-05,
+      "loss": 1.5419,
+      "step": 73
+    },
+    {
+      "epoch": 0.18387948439198634,
+      "grad_norm": 0.8211824893951416,
+      "learning_rate": 2.5633640731412412e-05,
+      "loss": 1.5489,
+      "step": 74
+    },
+    {
+      "epoch": 0.18636434228917534,
+      "grad_norm": 1.7613846063613892,
+      "learning_rate": 2.4522329276841663e-05,
+      "loss": 2.1532,
+      "step": 75
+    },
+    {
+      "epoch": 0.18636434228917534,
+      "eval_loss": 1.3022475242614746,
+      "eval_runtime": 1.1687,
+      "eval_samples_per_second": 42.783,
+      "eval_steps_per_second": 11.124,
+      "step": 75
+    },
+    {
+      "epoch": 0.18884920018636434,
+      "grad_norm": 0.11220304667949677,
+      "learning_rate": 2.3444344707738015e-05,
+      "loss": 1.047,
+      "step": 76
+    },
+    {
+      "epoch": 0.19133405808355336,
+      "grad_norm": 0.1274535059928894,
+      "learning_rate": 2.2400865784401e-05,
+      "loss": 1.1196,
+      "step": 77
+    },
+    {
+      "epoch": 0.19381891598074236,
+      "grad_norm": 0.13804402947425842,
+      "learning_rate": 2.1393033535713093e-05,
+      "loss": 1.1235,
+      "step": 78
+    },
+    {
+      "epoch": 0.19630377387793135,
+      "grad_norm": 0.14971572160720825,
+      "learning_rate": 2.0421950011441354e-05,
+      "loss": 1.1777,
+      "step": 79
+    },
+    {
+      "epoch": 0.19878863177512035,
+      "grad_norm": 0.18402217328548431,
+      "learning_rate": 1.9488677077162295e-05,
+      "loss": 1.179,
+      "step": 80
+    },
+    {
+      "epoch": 0.20127348967230937,
+      "grad_norm": 0.21275171637535095,
+      "learning_rate": 1.8594235253127375e-05,
+      "loss": 1.2864,
+      "step": 81
+    },
+    {
+      "epoch": 0.20375834756949837,
+      "grad_norm": 0.2665954530239105,
+      "learning_rate": 1.77396025983391e-05,
+      "loss": 1.2847,
+      "step": 82
+    },
+    {
+      "epoch": 0.20624320546668737,
+      "grad_norm": 0.3545299768447876,
+      "learning_rate": 1.6925713641057904e-05,
+      "loss": 1.4364,
+      "step": 83
+    },
+    {
+      "epoch": 0.20872806336387636,
+      "grad_norm": 0.44620993733406067,
+      "learning_rate": 1.6153458356909176e-05,
+      "loss": 1.4322,
+      "step": 84
+    },
+    {
+      "epoch": 0.2112129212610654,
+      "grad_norm": 0.5365117788314819,
+      "learning_rate": 1.5423681195707997e-05,
+      "loss": 1.3801,
+      "step": 85
+    },
+    {
+      "epoch": 0.2136977791582544,
+      "grad_norm": 0.6859667897224426,
+      "learning_rate": 1.4737180158065644e-05,
+      "loss": 1.5157,
+      "step": 86
+    },
+    {
+      "epoch": 0.21618263705544338,
+      "grad_norm": 1.0796610116958618,
+      "learning_rate": 1.4094705922787687e-05,
+      "loss": 1.8931,
+      "step": 87
+    },
+    {
+      "epoch": 0.2186674949526324,
+      "grad_norm": 0.15191839635372162,
+      "learning_rate": 1.3496961026017687e-05,
+      "loss": 1.0785,
+      "step": 88
+    },
+    {
+      "epoch": 0.2211523528498214,
+      "grad_norm": 0.12437998503446579,
+      "learning_rate": 1.2944599093024267e-05,
+      "loss": 1.0752,
+      "step": 89
+    },
+    {
+      "epoch": 0.2236372107470104,
+      "grad_norm": 0.12759096920490265,
+      "learning_rate": 1.2438224123471442e-05,
+      "loss": 1.0812,
+      "step": 90
+    },
+    {
+      "epoch": 0.2261220686441994,
+      "grad_norm": 0.14436273276805878,
+      "learning_rate": 1.1978389830953907e-05,
+      "loss": 1.1591,
+      "step": 91
+    },
+    {
+      "epoch": 0.22860692654138842,
+      "grad_norm": 0.1650199145078659,
+      "learning_rate": 1.1565599037519316e-05,
+      "loss": 1.1473,
+      "step": 92
+    },
+    {
+      "epoch": 0.23109178443857742,
+      "grad_norm": 0.1937108039855957,
+      "learning_rate": 1.1200303123839742e-05,
+      "loss": 1.2184,
+      "step": 93
+    },
+    {
+      "epoch": 0.23357664233576642,
+      "grad_norm": 0.2247273474931717,
+      "learning_rate": 1.088290153563358e-05,
+      "loss": 1.3174,
+      "step": 94
+    },
+    {
+      "epoch": 0.23606150023295544,
+      "grad_norm": 0.2764604687690735,
+      "learning_rate": 1.0613741346877497e-05,
+      "loss": 1.3146,
+      "step": 95
+    },
+    {
+      "epoch": 0.23854635813014444,
+      "grad_norm": 0.359423965215683,
+      "learning_rate": 1.0393116880286118e-05,
+      "loss": 1.4263,
+      "step": 96
+    },
+    {
+      "epoch": 0.24103121602733343,
+      "grad_norm": 0.4706696569919586,
+      "learning_rate": 1.0221269385474488e-05,
+      "loss": 1.3916,
+      "step": 97
+    },
+    {
+      "epoch": 0.24351607392452243,
+      "grad_norm": 0.598770797252655,
+      "learning_rate": 1.0098386775155147e-05,
+      "loss": 1.575,
+      "step": 98
+    },
+    {
+      "epoch": 0.24600093182171145,
+      "grad_norm": 0.7586723566055298,
+      "learning_rate": 1.0024603419658329e-05,
+      "loss": 1.5519,
+      "step": 99
+    },
+    {
+      "epoch": 0.24848578971890045,
+      "grad_norm": 1.7414568662643433,
+      "learning_rate": 1e-05,
+      "loss": 2.0062,
+      "step": 100
+    },
+    {
+      "epoch": 0.24848578971890045,
+      "eval_loss": 1.296454668045044,
+      "eval_runtime": 1.1762,
+      "eval_samples_per_second": 42.511,
+      "eval_steps_per_second": 11.053,
+      "step": 100
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 100,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 25,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 1,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.666832784162816e+17,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c49d1e5046f0c3bd27dd2a46c98109dc49b24e50465543e10efbb4397220f5e
+size 6840

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 2048,
+  "pad_token": "</s>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c49d1e5046f0c3bd27dd2a46c98109dc49b24e50465543e10efbb4397220f5e
+size 6840