{ | |
"dataset_name": "kmfoda/booksum", | |
"head_dim": 128, | |
"model_name": "Qwen/Qwen3-4B-Instruct-2507", | |
"n_sink": 4, | |
"num_heads": 32, | |
"num_layers": 36, | |
"num_samples": 100, | |
"sample_seq_len": 1000 | |
} |
{ | |
"dataset_name": "kmfoda/booksum", | |
"head_dim": 128, | |
"model_name": "Qwen/Qwen3-4B-Instruct-2507", | |
"n_sink": 4, | |
"num_heads": 32, | |
"num_layers": 36, | |
"num_samples": 100, | |
"sample_seq_len": 1000 | |
} |