-
Notifications
You must be signed in to change notification settings - Fork 39
/
results.jsonl
102 lines (102 loc) · 623 KB
/
results.jsonl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
{"date": "2024-01-07 22:29:36.008373", "total": 512, "seed": 0, "exp_name": "archangel_unaligned_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 14, "lengths": [1153, 1153, 1153, 565, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1151, 1153, 1153, 1153, 1152, 1153, 1153, 1153, 342, 1153, 82, 1153, 1153, 1153, 1153, 1153, 1152, 1153, 1153, 822, 830, 830, 830, 831, 830, 830, 830, 830, 830, 830, 828, 830, 830, 830, 830, 828, 830, 830, 309, 830, 610, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 58, 1522, 1418, 1522, 1522, 1522, 1522, 1522, 1522, 756, 1522, 1522, 1356, 1390, 1522, 1522, 1522, 1522, 1521, 1522, 1522, 1522, 1522, 923, 1522, 1522, 1522, 1522, 467, 735, 1522, 1522, 1232, 1232, 1209, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 180, 1232, 1232, 1232, 559, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1108, 1232, 1232, 1231, 1232, 1229, 1024, 153, 1024, 1024, 1024, 1024, 1024, 1017, 1024, 1024, 1015, 633, 271, 1022, 1024, 1023, 1024, 1024, 874, 1024, 916, 531, 1024, 1024, 634, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 830, 830, 830, 563, 830, 830, 830, 830, 830, 222, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 21, 830, 830, 830, 830, 830, 1622, 1064, 1611, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 555, 1622, 449, 1622, 1622, 1621, 1622, 1622, 1622, 946, 1622, 1621, 1622, 1622, 1622, 1622, 1622, 1621, 553, 1618, 1125, 1125, 1125, 1125, 1125, 1125, 311, 1125, 1125, 1125, 1125, 1089, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1124, 1125, 1125, 102, 1125, 1125, 1125, 1125, 4, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 462, 463, 463, 462, 463, 463, 463, 463, 337, 463, 463, 463, 463, 463, 463, 463, 463, 463, 34, 463, 463, 1340, 476, 1340, 1340, 627, 745, 1320, 1322, 746, 320, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 417, 94, 1339, 1340, 1340, 1340, 1340, 1340, 1340, 1339, 1434, 1434, 1434, 417, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1416, 1434, 889, 1434, 975, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1433, 1433, 1433, 1434, 1434, 1434, 1434, 855, 855, 855, 855, 855, 855, 732, 855, 853, 855, 855, 474, 839, 259, 855, 855, 855, 846, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 668, 1031, 1012, 1031, 660, 1031, 1031, 1031, 294, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 179, 1031, 1031, 1031, 1030, 1030, 1031, 1031, 1031, 1031, 1031, 1030, 1031, 1031, 1031, 1029, 1289, 1289, 1289, 1289, 1289, 1289, 1288, 1289, 1289, 1288, 1289, 445, 1289, 1288, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1281, 1289, 1289, 1289, 1289, 1289, 1290, 1289, 1638, 216, 1638, 1638, 1638, 1638, 1638, 1639, 1195, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1617, 1638, 1638, 1638, 1638, 1638, 1638, 1612, 1638, 1638, 1638, 1638, 1637, 1638, 1638, 1638, 1101, 1102, 1102, 1102, 1102, 1098, 327, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1101, 1102, 1102, 1101, 1102, 1102, 1102, 1102, 164, 1102, 197, 1102, 1102, 1102, 1102, 1102]}, "baseline": {"name": "chosen", "wins": 484, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_unaligned_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 42815, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": null, "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-07 22:43:41.759875", "total": 512, "seed": 0, "exp_name": "archangel_unaligned_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 31, "lengths": [1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 599, 1152, 1152, 1153, 863, 1153, 1153, 1153, 1153, 468, 1153, 374, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 830, 830, 830, 830, 479, 830, 827, 830, 830, 127, 829, 830, 830, 830, 830, 248, 821, 830, 830, 830, 830, 830, 830, 830, 830, 665, 830, 830, 830, 830, 830, 830, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 250, 1522, 604, 1522, 1522, 1522, 204, 1522, 1522, 1522, 1521, 1522, 1522, 1522, 1522, 1522, 1522, 805, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1232, 1231, 1232, 1232, 1230, 1232, 770, 1232, 1232, 1232, 1232, 1233, 775, 1232, 1232, 1232, 1176, 1232, 1232, 1232, 1232, 1231, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1025, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 825, 1024, 1024, 1024, 1024, 1024, 830, 830, 830, 508, 829, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 661, 830, 831, 830, 830, 830, 292, 830, 830, 830, 830, 50, 1622, 1622, 1622, 1621, 1622, 1622, 1623, 869, 1621, 1622, 1622, 1622, 1622, 75, 1622, 314, 221, 379, 1622, 1622, 1622, 1622, 1622, 1621, 1338, 1622, 1622, 1622, 677, 1094, 1622, 745, 1125, 1125, 1125, 1125, 197, 1125, 1125, 1125, 1125, 1125, 1125, 768, 1125, 870, 1125, 1125, 1125, 1125, 1125, 1125, 930, 1125, 1125, 1125, 1125, 539, 1125, 357, 454, 1125, 206, 1125, 460, 463, 463, 463, 367, 354, 463, 463, 463, 463, 463, 463, 463, 463, 463, 392, 463, 463, 463, 463, 463, 463, 463, 463, 463, 418, 463, 463, 463, 107, 463, 463, 397, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1248, 1340, 1340, 1340, 932, 1340, 1340, 1340, 60, 357, 1340, 1340, 688, 1339, 1340, 319, 643, 1340, 1340, 1340, 1340, 1340, 1340, 1226, 1434, 1434, 1434, 1434, 1434, 1434, 1432, 1433, 1434, 1434, 1434, 961, 1432, 1434, 961, 1434, 1402, 1434, 1434, 1008, 1434, 1434, 1434, 1434, 1434, 1434, 309, 1434, 1434, 1434, 1434, 855, 855, 855, 855, 855, 855, 557, 855, 853, 855, 855, 396, 855, 855, 855, 855, 855, 854, 855, 644, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 30, 1031, 584, 230, 1031, 1030, 943, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1029, 1031, 1023, 1031, 1031, 1031, 1031, 1031, 1031, 671, 1289, 1289, 1289, 440, 1289, 1289, 1289, 1288, 1289, 1289, 1289, 1065, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1277, 1289, 1289, 1289, 331, 176, 1289, 1289, 1289, 1289, 1638, 1638, 234, 1638, 1517, 1638, 1638, 1638, 151, 1585, 1638, 1638, 1638, 1638, 300, 333, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 209, 1638, 1637, 1638, 1638, 1266, 1638, 1638, 1638, 1102, 1102, 118, 1102, 1102, 1102, 1102, 547, 1102, 1102, 1102, 1101, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 438, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 235]}, "baseline": {"name": "chosen", "wins": 467, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_unaligned_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 41931, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": null, "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-07 22:57:53.810633", "total": 512, "seed": 0, "exp_name": "archangel_unaligned_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 33, "lengths": [1153, 1153, 1152, 1153, 1153, 1153, 806, 1153, 1153, 1153, 1128, 1153, 1152, 1153, 1153, 447, 1153, 1131, 1153, 341, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1152, 1153, 1153, 432, 830, 830, 830, 830, 830, 830, 830, 830, 831, 830, 830, 829, 37, 830, 830, 827, 830, 830, 503, 830, 401, 830, 830, 830, 830, 237, 830, 830, 830, 830, 830, 467, 1522, 1521, 354, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 513, 1522, 1522, 1522, 1522, 1519, 852, 1522, 204, 751, 1320, 1522, 692, 1522, 1477, 1522, 1522, 1522, 1232, 1231, 1232, 1150, 1232, 1232, 1231, 1232, 622, 1232, 1232, 1232, 756, 1232, 100, 1232, 1232, 1232, 1232, 1225, 1232, 1232, 1232, 1231, 1232, 1232, 1232, 1232, 599, 1232, 984, 1232, 1024, 403, 1024, 1024, 1024, 951, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1023, 1024, 965, 1024, 1024, 1024, 1024, 1024, 164, 1024, 1024, 1024, 1024, 830, 830, 830, 830, 830, 830, 830, 830, 828, 829, 720, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 1622, 1621, 569, 840, 1622, 1622, 1622, 1452, 1622, 103, 526, 665, 1621, 1622, 88, 1622, 1622, 1622, 1622, 1622, 1621, 241, 1622, 1622, 1550, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1125, 1125, 1125, 1125, 507, 1125, 1125, 1125, 1125, 810, 1125, 1119, 1123, 1125, 1125, 1125, 1124, 188, 1125, 1125, 1125, 1125, 623, 1125, 258, 1125, 1125, 1125, 455, 1125, 1125, 1125, 4, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 146, 463, 463, 1208, 1340, 1340, 1340, 1340, 1340, 1340, 1008, 1340, 799, 1340, 1340, 1340, 1340, 1340, 1340, 1336, 1340, 1340, 1340, 1340, 1340, 1340, 5, 1340, 1340, 154, 1340, 1340, 295, 242, 1340, 857, 1434, 1434, 1434, 1434, 1434, 1434, 426, 322, 1434, 299, 81, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1269, 1434, 1434, 110, 1434, 1434, 1434, 1434, 1434, 1416, 1434, 1434, 1434, 855, 855, 855, 855, 855, 855, 855, 855, 416, 855, 855, 855, 855, 734, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 854, 855, 855, 855, 624, 854, 855, 1031, 1031, 1031, 1031, 416, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 432, 1030, 34, 1031, 1031, 1030, 1029, 1030, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1289, 1289, 1289, 427, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 523, 1289, 1289, 1289, 621, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 603, 320, 64, 1288, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1637, 1638, 1638, 1638, 1638, 96, 1638, 1638, 1638, 1638, 1637, 1638, 1638, 1638, 1638, 1638, 1638, 1639, 419, 714, 1638, 1638, 580, 1638, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 361, 1102, 1063, 20, 1102, 660, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 318, 1102, 1102, 1102, 193]}, "baseline": {"name": "chosen", "wins": 460, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_unaligned_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 42905, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": null, "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-07 23:12:06.366979", "total": 512, "seed": 0, "exp_name": "archangel_unaligned_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 41, "lengths": [1153, 1153, 1153, 1153, 1153, 1153, 1153, 692, 1153, 1086, 1153, 1153, 461, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 431, 1153, 538, 1153, 1153, 1153, 1153, 1153, 830, 829, 114, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 704, 532, 829, 829, 830, 830, 830, 204, 830, 830, 830, 830, 830, 829, 830, 830, 830, 830, 830, 553, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1416, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1523, 64, 30, 1522, 1522, 1232, 1232, 130, 1232, 566, 1232, 136, 1232, 1232, 1232, 1232, 1232, 618, 1232, 4, 1232, 1232, 1232, 1232, 1232, 1232, 1228, 1232, 1232, 1232, 1230, 1232, 1232, 1232, 1232, 1232, 1232, 1024, 1024, 1023, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 31, 1024, 738, 1024, 1024, 1024, 1024, 1024, 1024, 1022, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 6, 1024, 1024, 1024, 523, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 829, 830, 214, 830, 830, 830, 829, 830, 830, 830, 830, 830, 830, 829, 1622, 439, 425, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 67, 1622, 884, 1622, 502, 1622, 1622, 1622, 1621, 1622, 1622, 1622, 1318, 1622, 1622, 1622, 1622, 239, 1622, 945, 1125, 504, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1124, 1125, 1125, 1124, 1125, 1125, 1125, 1125, 1125, 342, 1125, 1122, 1125, 233, 4, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 461, 463, 463, 292, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 1340, 1340, 1340, 1340, 1340, 242, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1339, 217, 1340, 219, 1340, 1340, 1340, 1340, 604, 859, 818, 1340, 1340, 1340, 1340, 1340, 462, 1340, 896, 1434, 1434, 984, 1434, 1062, 1434, 1424, 1434, 1434, 1434, 369, 439, 295, 830, 709, 415, 144, 1294, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 693, 1434, 818, 1434, 1434, 1434, 855, 855, 852, 855, 855, 855, 855, 855, 855, 96, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 287, 855, 855, 855, 855, 855, 855, 855, 1031, 1031, 928, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1032, 1031, 1031, 1031, 1031, 1030, 1031, 0, 1031, 1031, 1031, 1031, 1031, 1031, 1030, 1031, 1031, 1289, 1289, 1289, 1285, 1289, 1289, 1289, 452, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1287, 1289, 1289, 1289, 1288, 1289, 1289, 1030, 1289, 1274, 1289, 543, 1289, 1287, 1289, 1289, 1337, 1638, 1638, 1440, 739, 292, 1638, 1638, 1638, 1637, 1638, 1637, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 855, 270, 1638, 1638, 1638, 189, 401, 1638, 1638, 764, 1638, 1638, 1638, 1102, 1102, 1028, 1102, 1102, 1102, 1102, 1102, 1102, 81, 1102, 1102, 1102, 1102, 1102, 1102, 1101, 172, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102]}, "baseline": {"name": "chosen", "wins": 455, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_unaligned_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 43405, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": null, "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-07 23:26:11.850078", "total": 512, "seed": 0, "exp_name": "archangel_unaligned_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 144, "lengths": [1086, 229, 1086, 1085, 993, 1086, 1086, 1086, 1086, 1086, 1086, 1085, 1086, 1086, 1086, 1086, 222, 664, 899, 1086, 1059, 1085, 1086, 1086, 217, 1086, 112, 1085, 1086, 1086, 856, 211, 742, 742, 742, 215, 742, 742, 742, 742, 742, 742, 742, 606, 180, 679, 377, 741, 742, 741, 742, 742, 741, 289, 742, 742, 307, 743, 742, 742, 741, 742, 742, 742, 314, 181, 1464, 877, 697, 1464, 1464, 28, 842, 1464, 1464, 1464, 1463, 285, 1464, 1464, 883, 1464, 1464, 1463, 1464, 835, 1463, 1465, 1463, 1464, 1464, 1464, 1464, 700, 213, 1464, 997, 699, 481, 997, 997, 143, 42, 997, 997, 997, 998, 997, 997, 997, 36, 996, 997, 997, 309, 997, 997, 996, 997, 997, 997, 997, 565, 997, 543, 997, 996, 997, 1025, 1025, 233, 715, 1026, 1025, 487, 1025, 1025, 1025, 97, 1024, 1025, 780, 1025, 1025, 1026, 1025, 1025, 474, 1024, 1025, 1025, 1024, 659, 1024, 745, 357, 1025, 1024, 1025, 1025, 814, 813, 188, 814, 813, 640, 814, 338, 814, 215, 814, 814, 814, 554, 814, 500, 814, 814, 814, 814, 813, 814, 814, 800, 813, 814, 771, 267, 814, 217, 814, 15, 1141, 1590, 1192, 1590, 940, 1590, 623, 1589, 1590, 1590, 1590, 1220, 571, 21, 1589, 1590, 59, 81, 1590, 87, 418, 1590, 1590, 1590, 1590, 1268, 112, 1591, 1100, 1590, 1009, 1590, 400, 100, 1026, 16, 1026, 1025, 1018, 1027, 1026, 595, 1026, 1026, 213, 1026, 1027, 480, 1026, 1026, 1026, 544, 192, 287, 536, 1026, 1026, 1026, 1025, 761, 163, 208, 772, 1026, 240, 240, 240, 240, 240, 240, 240, 240, 241, 240, 240, 240, 240, 240, 241, 240, 240, 240, 240, 240, 208, 240, 240, 240, 240, 240, 239, 240, 240, 240, 240, 240, 1198, 114, 1283, 14, 1282, 1283, 1283, 402, 251, 1031, 1283, 766, 1282, 1283, 830, 1283, 1284, 721, 202, 805, 1283, 1283, 1283, 17, 249, 574, 86, 1283, 285, 1282, 1283, 350, 1374, 152, 365, 1374, 1375, 1375, 1375, 1375, 1375, 183, 1375, 718, 1375, 1375, 728, 1057, 472, 1375, 1375, 1374, 1375, 1375, 844, 1375, 1375, 1375, 1375, 1375, 1376, 182, 905, 1375, 683, 683, 683, 683, 660, 424, 683, 683, 683, 683, 683, 253, 683, 12, 213, 359, 684, 683, 661, 683, 683, 77, 683, 683, 683, 260, 683, 683, 683, 682, 683, 115, 915, 615, 390, 915, 915, 915, 915, 915, 330, 915, 915, 915, 915, 26, 915, 353, 702, 913, 915, 915, 917, 642, 915, 914, 915, 169, 915, 410, 915, 914, 915, 913, 914, 412, 689, 1220, 1220, 4, 1220, 1219, 588, 1220, 1220, 1220, 1220, 1220, 130, 1220, 1220, 1220, 1220, 1220, 1140, 352, 47, 1220, 1220, 682, 287, 487, 1219, 1220, 587, 961, 1593, 1593, 842, 416, 1593, 1593, 88, 753, 373, 1591, 629, 1593, 951, 1075, 1592, 1593, 309, 111, 1593, 1593, 1593, 1592, 1593, 3, 1593, 896, 459, 1593, 1593, 636, 1593, 1593, 319, 583, 285, 962, 962, 962, 962, 963, 962, 961, 457, 962, 962, 515, 962, 962, 398, 962, 209, 586, 197, 962, 962, 962, 962, 472, 962, 613, 962, 962, 440, 60]}, "baseline": {"name": "chosen", "wins": 341, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_unaligned_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 57421, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": null, "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-07 23:40:28.074105", "total": 512, "seed": 0, "exp_name": "archangel_unaligned_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 149, "lengths": [1085, 1086, 727, 1086, 1086, 1086, 1086, 1086, 1086, 1086, 1085, 1085, 1086, 1086, 1086, 1086, 801, 1086, 1086, 353, 1086, 1086, 1086, 1085, 227, 1086, 1086, 1085, 1086, 1086, 1086, 1086, 742, 742, 742, 717, 742, 742, 742, 742, 742, 742, 742, 742, 742, 742, 742, 741, 742, 741, 742, 685, 741, 742, 742, 742, 742, 741, 622, 741, 741, 742, 742, 742, 1464, 195, 1464, 1464, 1464, 1464, 1464, 1464, 1464, 1464, 1464, 1464, 1464, 1464, 1463, 1464, 1463, 1464, 1464, 1464, 1464, 1464, 1464, 1465, 1463, 697, 1464, 1464, 1464, 1464, 1463, 1462, 997, 554, 997, 997, 997, 997, 997, 997, 997, 997, 994, 997, 997, 997, 996, 996, 997, 997, 997, 997, 997, 997, 997, 997, 997, 963, 998, 997, 997, 997, 996, 291, 1024, 1025, 1025, 887, 774, 1025, 1025, 1025, 1025, 1025, 1025, 1025, 1025, 1025, 1025, 1025, 1026, 1025, 1023, 1025, 1024, 1025, 1025, 1025, 386, 1024, 1025, 203, 1025, 1024, 1025, 1025, 814, 813, 814, 814, 813, 813, 814, 813, 814, 814, 814, 814, 814, 814, 814, 814, 814, 814, 814, 814, 813, 814, 814, 814, 813, 678, 814, 22, 814, 813, 817, 814, 1588, 1590, 1590, 1590, 862, 1590, 1590, 1590, 1590, 1590, 1590, 1590, 1590, 1590, 1225, 1590, 954, 1590, 1590, 1590, 481, 1590, 1590, 1590, 583, 1590, 1590, 1590, 1590, 1590, 1590, 1590, 1026, 1026, 1026, 1026, 1026, 1026, 1026, 1026, 963, 1026, 1026, 919, 213, 1026, 1026, 143, 1026, 1026, 1026, 1025, 1025, 1026, 1026, 1026, 1026, 1026, 1026, 1026, 1026, 1026, 1025, 1026, 240, 240, 240, 240, 240, 240, 127, 240, 240, 240, 240, 239, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 239, 240, 238, 230, 240, 240, 1283, 463, 1283, 129, 1282, 1283, 1283, 1283, 1283, 1283, 1283, 1283, 1283, 728, 1283, 1283, 1282, 489, 1283, 1283, 1283, 1283, 1283, 30, 1283, 1086, 1283, 1283, 1282, 1282, 1283, 1283, 1374, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1375, 1374, 281, 1375, 1375, 1375, 1375, 945, 1312, 1375, 244, 1375, 1375, 1375, 1375, 1375, 1375, 594, 1373, 1375, 1375, 1375, 1375, 683, 683, 683, 683, 683, 620, 683, 683, 683, 683, 683, 342, 683, 683, 683, 683, 683, 683, 467, 683, 683, 683, 683, 683, 683, 683, 683, 683, 475, 682, 683, 683, 915, 714, 915, 915, 915, 915, 915, 915, 915, 915, 915, 915, 915, 914, 915, 915, 915, 914, 915, 915, 915, 915, 915, 915, 915, 915, 915, 914, 915, 914, 915, 915, 1212, 1220, 1220, 1220, 1220, 1220, 1219, 984, 1219, 1220, 1220, 1220, 1220, 1220, 79, 1220, 1220, 1220, 1153, 1220, 1220, 1220, 869, 266, 1220, 1220, 1219, 1220, 1219, 1220, 1220, 1220, 1594, 1593, 1593, 509, 1593, 1593, 1591, 1593, 230, 1593, 1593, 1593, 1593, 1593, 1592, 1593, 1593, 1593, 1593, 1593, 760, 1592, 1119, 1592, 1593, 1593, 1593, 1593, 1593, 1593, 1593, 1593, 535, 962, 962, 962, 962, 962, 961, 962, 962, 962, 962, 962, 962, 145, 962, 962, 962, 962, 961, 962, 962, 962, 962, 962, 962, 962, 962, 962, 962, 962, 15, 962]}, "baseline": {"name": "chosen", "wins": 344, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_unaligned_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 33831, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": null, "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-07 23:54:42.964892", "total": 512, "seed": 0, "exp_name": "archangel_unaligned_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 187, "lengths": [1223, 85, 238, 1460, 1460, 1460, 1460, 202, 1460, 1460, 764, 1459, 1460, 432, 1460, 756, 955, 1086, 558, 483, 474, 193, 1086, 529, 848, 1086, 1086, 1085, 85, 323, 146, 1085, 368, 446, 1404, 262, 1404, 1404, 1404, 815, 1403, 1404, 394, 452, 1403, 1398, 1404, 150, 743, 397, 300, 742, 742, 742, 438, 742, 742, 699, 21, 742, 742, 741, 742, 105, 1510, 708, 1510, 1511, 290, 696, 1090, 1511, 1511, 1512, 1511, 1511, 1511, 1511, 1299, 1168, 1463, 846, 134, 1464, 1464, 1464, 1464, 497, 30, 1464, 566, 1464, 1464, 1464, 1464, 1464, 997, 775, 320, 997, 746, 843, 93, 997, 996, 655, 997, 997, 689, 997, 14, 169, 313, 1319, 410, 171, 1485, 377, 1485, 1485, 1485, 882, 1485, 1485, 1485, 1347, 1485, 72, 90, 1574, 1574, 1575, 1574, 51, 1574, 1574, 1218, 270, 1575, 1574, 1574, 521, 1213, 634, 1026, 1025, 1025, 1025, 1024, 1024, 1025, 1025, 1025, 1025, 982, 32, 96, 1025, 1025, 923, 813, 813, 814, 814, 517, 814, 125, 814, 814, 288, 436, 814, 814, 814, 77, 814, 1455, 1455, 1455, 1454, 1455, 1455, 303, 1455, 553, 70, 4, 333, 1455, 1455, 74, 1455, 1588, 1590, 1590, 1590, 1589, 550, 1589, 1590, 1590, 1590, 1590, 588, 216, 1588, 1589, 1590, 1711, 1711, 84, 1711, 553, 1151, 1711, 1711, 1711, 149, 306, 1710, 1711, 1711, 497, 1711, 1026, 779, 1026, 1026, 1026, 405, 1026, 1027, 1027, 1026, 1026, 1025, 154, 1026, 1026, 182, 1289, 1289, 172, 1289, 56, 1289, 549, 1289, 1289, 1289, 1289, 152, 1289, 1289, 1288, 1289, 6, 1224, 1357, 660, 200, 1358, 1358, 1358, 1358, 1358, 1358, 1358, 1358, 155, 796, 411, 239, 240, 240, 240, 191, 240, 240, 239, 240, 239, 240, 240, 240, 240, 240, 239, 334, 1525, 720, 12, 414, 301, 1525, 678, 1525, 113, 1525, 214, 313, 1441, 307, 1525, 1283, 215, 250, 1283, 1283, 1282, 318, 24, 1283, 1281, 1283, 1283, 271, 1282, 1283, 849, 238, 1375, 1358, 1375, 1375, 1375, 1373, 1375, 207, 1373, 1208, 1375, 137, 1375, 581, 1375, 1602, 1602, 1602, 1602, 456, 1602, 162, 1602, 1602, 1602, 1602, 1602, 1601, 1602, 540, 123, 540, 71, 366, 683, 683, 211, 208, 682, 454, 683, 682, 248, 682, 112, 682, 682, 1444, 1444, 1444, 1444, 138, 1444, 700, 371, 769, 1444, 1444, 1444, 1444, 147, 1444, 34, 1029, 948, 1316, 1316, 452, 1316, 1316, 77, 1101, 518, 1316, 1316, 1315, 536, 1316, 758, 915, 118, 455, 914, 821, 638, 916, 915, 915, 915, 915, 915, 915, 350, 915, 269, 1220, 1220, 965, 721, 1220, 301, 1220, 434, 1220, 1220, 1220, 1219, 1220, 1220, 1219, 998, 157, 1363, 403, 970, 1363, 1127, 1082, 1363, 1363, 1040, 45, 1291, 1364, 213, 1363, 1363, 1606, 804, 257, 1606, 1606, 1606, 285, 1033, 1605, 520, 1606, 1606, 1606, 1605, 146, 1606, 134, 465, 1593, 444, 1593, 471, 897, 31, 1593, 1593, 52, 816, 1593, 935, 1593, 1593, 1528, 1362, 908, 1528, 394, 1528, 1528, 1528, 1528, 640, 601, 1527, 472, 209, 366, 640, 962, 962, 962, 962, 962, 962, 962, 775, 962, 962, 962, 617, 66, 962, 233, 962]}, "baseline": {"name": "chosen", "wins": 301, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_unaligned_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 48575, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": null, "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 00:07:50.319187", "total": 512, "seed": 0, "exp_name": "archangel_sft_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 77, "lengths": [85, 112, 64, 23, 20, 3, 47, 39, 127, 17, 104, 109, 86, 8, 51, 62, 100, 52, 40, 14, 193, 126, 117, 6, 146, 30, 5, 40, 3, 52, 66, 7, 100, 121, 48, 213, 14, 89, 111, 163, 18, 164, 4, 29, 290, 10, 204, 72, 199, 15, 97, 216, 78, 92, 26, 47, 97, 16, 31, 232, 125, 21, 235, 9, 87, 28, 79, 362, 68, 17, 208, 7, 20, 45, 26, 43, 523, 156, 10, 38, 9, 34, 63, 71, 73, 516, 16, 879, 52, 10, 34, 96, 5, 267, 143, 53, 64, 52, 44, 39, 241, 60, 56, 860, 16, 78, 8, 74, 226, 158, 58, 22, 36, 110, 98, 27, 63, 80, 52, 36, 35, 41, 57, 59, 73, 31, 7, 141, 33, 115, 27, 25, 65, 54, 14, 22, 373, 37, 80, 94, 358, 34, 175, 145, 476, 117, 55, 66, 36, 8, 103, 15, 242, 27, 48, 42, 108, 27, 44, 70, 165, 137, 151, 280, 24, 42, 9, 43, 168, 36, 406, 77, 168, 152, 23, 92, 47, 35, 24, 74, 17, 7, 48, 4, 129, 31, 18, 63, 25, 60, 27, 31, 17, 60, 27, 6, 117, 175, 95, 127, 139, 44, 7, 176, 54, 69, 79, 57, 13, 132, 230, 49, 76, 33, 222, 22, 67, 50, 87, 22, 1622, 149, 233, 94, 10, 7, 48, 59, 72, 99, 90, 95, 55, 38, 119, 174, 469, 463, 75, 56, 55, 4, 116, 39, 166, 109, 178, 12, 99, 40, 84, 98, 78, 84, 27, 128, 19, 463, 48, 189, 11, 41, 88, 328, 137, 10, 57, 78, 23, 21, 74, 119, 58, 91, 38, 58, 463, 66, 26, 42, 37, 90, 149, 39, 82, 216, 332, 124, 64, 301, 142, 39, 25, 34, 73, 66, 45, 120, 1340, 71, 21, 132, 19, 46, 51, 260, 24, 33, 13, 73, 675, 26, 119, 59, 26, 248, 49, 57, 105, 30, 70, 15, 71, 10, 95, 18, 5, 42, 32, 46, 151, 197, 29, 377, 30, 520, 296, 243, 29, 72, 90, 30, 108, 22, 74, 165, 22, 43, 45, 106, 43, 112, 31, 81, 88, 64, 717, 114, 55, 57, 45, 10, 32, 544, 42, 38, 14, 31, 83, 475, 40, 91, 38, 68, 13, 771, 194, 165, 62, 11, 18, 29, 57, 3, 69, 24, 39, 245, 312, 34, 90, 14, 265, 4, 33, 75, 20, 44, 35, 151, 22, 3, 17, 87, 62, 53, 71, 87, 268, 51, 19, 15, 267, 26, 132, 18, 103, 12, 147, 13, 889, 8, 22, 1289, 32, 181, 418, 24, 331, 112, 60, 27, 43, 18, 55, 19, 43, 220, 434, 61, 59, 67, 77, 80, 23, 16, 67, 53, 268, 1638, 224, 58, 194, 20, 138, 21, 215, 3, 221, 47, 95, 29, 22, 13, 108, 28, 92, 920, 83, 28, 8, 11, 122, 16, 49, 50, 132, 44, 382, 57, 237, 87, 20, 129, 91, 21, 87, 23, 82, 45, 75, 30, 45, 103, 154, 14, 73, 130, 24, 50, 2, 2, 77, 63, 119, 43, 29, 135, 23, 39, 290, 53]}, "baseline": {"name": "chosen", "wins": 401, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 42815, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft_pythia1-4b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 00:20:50.426232", "total": 512, "seed": 0, "exp_name": "archangel_sft_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 105, "lengths": [9, 682, 28, 35, 34, 23, 78, 97, 268, 37, 50, 94, 60, 22, 39, 99, 114, 73, 71, 92, 66, 89, 83, 8, 51, 58, 5, 41, 59, 16, 45, 61, 24, 137, 60, 72, 34, 310, 45, 30, 22, 96, 14, 54, 5, 130, 27, 133, 65, 6, 15, 246, 41, 467, 78, 6, 56, 7, 5, 118, 122, 21, 59, 144, 97, 73, 50, 207, 40, 111, 88, 23, 39, 26, 53, 26, 214, 178, 229, 91, 22, 24, 122, 201, 6, 138, 65, 161, 86, 62, 14, 50, 18, 23, 27, 165, 141, 455, 5, 11, 31, 83, 84, 138, 7, 89, 429, 107, 328, 219, 99, 38, 29, 31, 30, 34, 19, 15, 76, 16, 37, 119, 397, 232, 70, 115, 89, 15, 14, 146, 20, 148, 58, 60, 52, 141, 27, 90, 35, 99, 126, 21, 113, 8, 516, 146, 356, 811, 35, 13, 55, 57, 259, 15, 36, 13, 1023, 28, 46, 70, 304, 15, 194, 178, 29, 99, 42, 58, 44, 42, 113, 43, 22, 31, 29, 65, 302, 27, 78, 164, 141, 18, 87, 289, 309, 13, 0, 144, 45, 261, 30, 6, 21, 46, 69, 32, 37, 15, 44, 88, 223, 33, 170, 204, 871, 9, 25, 33, 61, 1622, 64, 71, 30, 76, 133, 41, 8, 174, 40, 35, 409, 86, 75, 133, 9, 367, 68, 17, 125, 108, 45, 192, 69, 8, 159, 613, 31, 693, 47, 44, 71, 29, 31, 79, 74, 124, 27, 325, 91, 1125, 196, 146, 93, 79, 5, 131, 4, 78, 127, 216, 22, 14, 282, 113, 119, 30, 36, 131, 5, 117, 40, 121, 349, 36, 57, 16, 111, 176, 463, 281, 103, 272, 98, 26, 15, 463, 186, 72, 144, 102, 68, 340, 232, 55, 43, 102, 413, 200, 94, 34, 14, 6, 410, 72, 23, 25, 104, 59, 93, 30, 16, 4, 45, 1204, 49, 25, 75, 161, 231, 174, 39, 150, 51, 136, 9, 9, 222, 122, 34, 54, 76, 188, 90, 103, 182, 88, 254, 89, 9, 213, 28, 62, 65, 88, 818, 42, 98, 233, 22, 211, 101, 20, 78, 23, 102, 7, 68, 91, 70, 144, 26, 35, 62, 174, 54, 5, 92, 45, 99, 305, 106, 51, 130, 111, 29, 67, 232, 80, 576, 89, 207, 191, 188, 43, 48, 31, 53, 15, 420, 25, 118, 204, 428, 46, 754, 28, 110, 43, 109, 328, 20, 28, 230, 71, 131, 303, 8, 41, 46, 67, 31, 90, 16, 1031, 38, 7, 79, 33, 198, 79, 6, 4, 91, 76, 37, 14, 286, 20, 42, 186, 21, 162, 17, 59, 118, 29, 85, 1289, 47, 70, 5, 167, 153, 21, 64, 22, 5, 26, 77, 87, 94, 50, 136, 16, 111, 27, 99, 31, 134, 137, 48, 168, 21, 70, 68, 22, 34, 108, 59, 35, 48, 4, 91, 60, 269, 75, 27, 84, 107, 170, 141, 49, 56, 923, 90, 18, 97, 65, 81, 203, 24, 24, 11, 148, 122, 35, 14, 1101, 81, 73, 39, 66, 115, 51, 72, 126, 93, 125, 19, 98, 5, 39]}, "baseline": {"name": "chosen", "wins": 370, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 41931, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft_pythia2-8b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 00:33:46.576944", "total": 512, "seed": 0, "exp_name": "archangel_sft_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 124, "lengths": [91, 108, 279, 10, 203, 67, 188, 69, 1153, 119, 188, 146, 20, 96, 28, 165, 231, 95, 109, 20, 196, 39, 105, 12, 80, 71, 4, 46, 22, 32, 42, 48, 9, 191, 19, 82, 16, 39, 208, 88, 24, 143, 117, 4, 125, 39, 26, 70, 23, 8, 31, 200, 40, 102, 4, 268, 292, 227, 6, 358, 153, 52, 99, 43, 105, 59, 41, 248, 67, 52, 56, 18, 66, 112, 12, 68, 433, 66, 51, 21, 91, 136, 45, 51, 9, 302, 149, 79, 16, 87, 52, 86, 14, 7, 64, 1074, 79, 52, 22, 81, 193, 57, 30, 116, 345, 89, 59, 122, 157, 30, 17, 34, 51, 232, 7, 71, 11, 112, 28, 18, 409, 71, 313, 63, 65, 19, 24, 25, 45, 76, 84, 58, 27, 22, 26, 63, 301, 47, 101, 7, 220, 136, 100, 34, 1024, 120, 122, 764, 162, 8, 26, 49, 80, 132, 45, 12, 15, 32, 13, 42, 113, 174, 830, 31, 4, 124, 80, 50, 12, 110, 72, 56, 63, 454, 94, 119, 67, 90, 22, 111, 95, 123, 69, 43, 37, 10, 3, 36, 16, 266, 28, 6, 16, 66, 12, 67, 44, 591, 212, 105, 80, 16, 31, 87, 142, 13, 17, 105, 47, 33, 31, 57, 93, 106, 225, 27, 98, 54, 330, 50, 170, 74, 102, 237, 21, 129, 70, 53, 130, 160, 3, 529, 112, 15, 8, 209, 66, 412, 295, 7, 21, 10, 75, 61, 59, 411, 188, 72, 46, 20, 63, 122, 83, 198, 4, 98, 4, 14, 89, 35, 13, 25, 92, 30, 463, 23, 28, 49, 7, 85, 108, 154, 71, 54, 14, 31, 64, 28, 68, 96, 79, 14, 43, 33, 39, 194, 89, 29, 155, 52, 15, 3, 6, 72, 18, 136, 14, 113, 26, 112, 77, 155, 125, 1340, 145, 55, 21, 35, 75, 52, 31, 6, 160, 243, 45, 25, 65, 45, 165, 241, 218, 27, 38, 189, 116, 100, 50, 34, 30, 9, 51, 128, 43, 137, 140, 492, 452, 121, 35, 63, 82, 33, 61, 73, 28, 29, 24, 110, 49, 64, 72, 18, 99, 120, 145, 173, 49, 23, 33, 8, 75, 43, 47, 72, 38, 187, 87, 58, 16, 19, 212, 46, 34, 158, 290, 37, 53, 18, 60, 7, 168, 72, 26, 9, 140, 50, 23, 74, 80, 51, 206, 27, 151, 60, 22, 81, 309, 89, 8, 274, 26, 18, 25, 32, 13, 408, 145, 60, 78, 47, 32, 59, 145, 56, 134, 20, 87, 32, 155, 25, 11, 11, 229, 1256, 56, 134, 22, 31, 38, 131, 278, 19, 17, 90, 105, 21, 25, 1289, 175, 33, 10, 70, 114, 39, 95, 8, 5, 72, 604, 31, 26, 21, 16, 20, 34, 61, 20, 31, 94, 50, 81, 87, 298, 97, 100, 173, 78, 27, 13, 51, 120, 63, 177, 59, 18, 94, 45, 58, 204, 72, 115, 25, 66, 256, 34, 18, 29, 27, 174, 169, 77, 92, 30, 153, 12, 30, 64, 119, 12, 131, 8, 7, 14, 35, 61, 1101, 133, 198, 180, 3, 1096, 72]}, "baseline": {"name": "chosen", "wins": 351, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 42905, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft_pythia6-9b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 00:46:43.135475", "total": 512, "seed": 0, "exp_name": "archangel_sft_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 97, "lengths": [21, 96, 26, 45, 63, 7, 68, 103, 109, 31, 74, 449, 196, 125, 1153, 100, 22, 39, 46, 29, 11, 7, 66, 21, 370, 21, 6, 45, 100, 31, 276, 224, 91, 107, 367, 143, 11, 30, 10, 69, 86, 54, 186, 88, 218, 26, 87, 190, 124, 58, 13, 69, 60, 32, 49, 45, 161, 165, 6, 8, 168, 8, 130, 107, 101, 34, 41, 257, 16, 50, 42, 120, 5, 28, 39, 151, 196, 101, 87, 28, 281, 91, 351, 40, 63, 73, 38, 41, 36, 163, 202, 293, 28, 6, 291, 595, 32, 35, 7, 41, 87, 87, 106, 161, 246, 196, 20, 91, 269, 59, 5, 121, 29, 158, 43, 73, 17, 46, 109, 39, 117, 44, 214, 79, 56, 271, 34, 540, 73, 0, 44, 22, 46, 42, 139, 12, 82, 83, 210, 1024, 185, 19, 845, 69, 430, 182, 691, 138, 91, 80, 39, 298, 157, 40, 18, 16, 329, 107, 42, 48, 34, 26, 136, 152, 16, 28, 47, 30, 71, 31, 69, 73, 34, 126, 15, 153, 110, 61, 78, 49, 57, 31, 51, 132, 576, 5, 102, 197, 32, 228, 202, 4, 64, 88, 31, 10, 122, 181, 43, 215, 89, 138, 51, 39, 82, 17, 214, 74, 65, 49, 65, 23, 68, 38, 482, 50, 291, 53, 35, 104, 11, 134, 30, 107, 359, 25, 10, 40, 157, 59, 75, 464, 67, 39, 98, 725, 52, 229, 37, 47, 89, 6, 77, 45, 65, 149, 24, 166, 51, 71, 51, 127, 1125, 61, 4, 80, 103, 43, 8, 74, 126, 38, 69, 138, 15, 64, 101, 58, 6, 84, 76, 90, 117, 62, 159, 19, 76, 464, 213, 39, 46, 50, 182, 76, 177, 174, 217, 201, 337, 606, 84, 34, 43, 158, 119, 110, 145, 207, 75, 80, 164, 79, 1340, 235, 38, 19, 580, 58, 90, 140, 42, 6, 135, 213, 7, 45, 33, 93, 62, 35, 35, 223, 85, 487, 33, 37, 146, 32, 4, 50, 90, 839, 182, 92, 395, 69, 36, 442, 163, 243, 32, 45, 94, 37, 50, 210, 7, 200, 76, 76, 69, 133, 192, 37, 26, 47, 43, 22, 424, 68, 26, 41, 72, 52, 51, 23, 49, 31, 88, 151, 7, 55, 662, 49, 855, 40, 107, 49, 161, 51, 82, 628, 86, 5, 276, 40, 35, 1031, 209, 309, 24, 44, 62, 109, 60, 18, 264, 112, 5, 68, 9, 10, 1031, 66, 120, 33, 1031, 84, 11, 14, 281, 17, 10, 27, 21, 20, 38, 98, 44, 11, 3, 14, 31, 211, 30, 35, 3, 266, 58, 139, 6, 81, 1289, 32, 82, 76, 15, 1289, 870, 173, 23, 132, 147, 23, 66, 57, 940, 124, 423, 25, 333, 169, 48, 70, 32, 30, 59, 56, 199, 83, 75, 63, 281, 55, 115, 40, 17, 124, 109, 32, 31, 72, 27, 82, 70, 37, 72, 25, 55, 79, 100, 14, 143, 284, 227, 18, 224, 89, 51, 23, 18, 60, 10, 35, 180, 1102, 278, 907, 9, 77, 4, 12, 22, 268, 67, 606, 87, 221, 8, 36, 2, 263]}, "baseline": {"name": "chosen", "wins": 375, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 43405, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft_pythia12-0b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 00:59:36.497737", "total": 512, "seed": 0, "exp_name": "archangel_sft_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 184, "lengths": [9, 65, 43, 12, 38, 11, 17, 55, 226, 496, 79, 36, 45, 44, 137, 10, 105, 63, 10, 115, 30, 43, 13, 15, 204, 314, 6, 17, 77, 171, 158, 12, 90, 73, 117, 18, 23, 37, 80, 91, 29, 456, 12, 105, 38, 67, 280, 95, 119, 12, 45, 171, 220, 87, 64, 46, 742, 215, 12, 37, 116, 408, 187, 15, 102, 73, 60, 50, 11, 9, 47, 66, 91, 14, 20, 8, 135, 24, 59, 17, 13, 37, 122, 260, 8, 119, 113, 11, 129, 25, 145, 153, 1067, 6, 222, 54, 144, 195, 11, 78, 8, 18, 31, 86, 9, 283, 66, 128, 82, 32, 12, 70, 36, 176, 20, 59, 50, 10, 54, 20, 43, 443, 143, 14, 183, 10, 95, 28, 6, 17, 58, 90, 104, 23, 40, 98, 25, 27, 17, 94, 133, 22, 184, 147, 464, 104, 229, 23, 83, 204, 105, 40, 316, 134, 19, 6, 11, 9, 12, 36, 7, 125, 105, 17, 451, 13, 196, 29, 13, 21, 22, 23, 29, 24, 23, 55, 104, 13, 244, 63, 118, 30, 11, 100, 43, 13, 15, 54, 38, 43, 90, 8, 45, 86, 62, 6, 49, 61, 121, 179, 142, 72, 43, 142, 229, 17, 30, 99, 19, 106, 27, 80, 66, 74, 186, 16, 137, 67, 32, 311, 90, 672, 63, 319, 30, 23, 32, 48, 97, 80, 22, 33, 88, 17, 67, 109, 40, 369, 112, 129, 41, 23, 127, 47, 11, 120, 219, 65, 15, 8, 61, 45, 190, 111, 1, 178, 44, 9, 16, 133, 240, 34, 115, 136, 29, 35, 70, 109, 13, 44, 223, 167, 50, 76, 50, 14, 133, 43, 125, 82, 47, 42, 127, 20, 110, 240, 156, 24, 127, 145, 105, 19, 156, 51, 26, 82, 181, 80, 59, 189, 101, 62, 28, 49, 207, 111, 22, 138, 59, 243, 57, 6, 267, 82, 20, 19, 23, 39, 68, 95, 25, 426, 7, 152, 69, 14, 59, 159, 13, 81, 60, 145, 11, 35, 125, 120, 16, 82, 17, 108, 27, 22, 134, 29, 77, 105, 103, 93, 105, 23, 18, 16, 130, 261, 154, 40, 79, 10, 92, 166, 33, 50, 60, 283, 22, 6, 121, 34, 47, 53, 24, 101, 129, 46, 123, 28, 25, 33, 101, 95, 203, 36, 94, 12, 86, 24, 71, 87, 46, 107, 142, 136, 188, 247, 303, 284, 57, 133, 8, 72, 102, 17, 27, 171, 33, 183, 16, 309, 278, 85, 15, 34, 14, 105, 35, 30, 497, 107, 49, 6, 21, 6, 28, 41, 174, 16, 68, 20, 13, 126, 10, 25, 17, 11, 157, 24, 11, 46, 42, 66, 10, 99, 77, 1220, 145, 48, 4, 9, 133, 140, 270, 10, 198, 15, 25, 78, 41, 50, 777, 234, 642, 92, 49, 50, 73, 45, 77, 88, 31, 8, 9, 39, 52, 11, 20, 172, 29, 54, 77, 100, 106, 84, 25, 328, 128, 32, 93, 25, 76, 24, 160, 113, 79, 62, 240, 25, 200, 296, 218, 249, 19, 17, 92, 36, 227, 52, 108, 86, 52, 11, 6, 125]}, "baseline": {"name": "chosen", "wins": 302, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 57421, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft_llama7b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 01:12:43.111604", "total": 512, "seed": 0, "exp_name": "archangel_sft_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 190, "lengths": [13, 46, 59, 8, 41, 11, 101, 65, 353, 59, 33, 25, 68, 244, 147, 146, 38, 34, 10, 229, 16, 12, 19, 19, 32, 73, 6, 80, 21, 85, 131, 12, 380, 213, 102, 46, 12, 74, 167, 45, 11, 76, 99, 33, 30, 156, 76, 132, 150, 18, 13, 201, 87, 65, 45, 4, 315, 135, 6, 185, 94, 43, 16, 42, 127, 48, 50, 93, 26, 103, 63, 182, 58, 61, 153, 23, 272, 20, 35, 128, 6, 46, 68, 209, 53, 119, 66, 89, 61, 21, 103, 66, 11, 26, 7, 132, 220, 30, 9, 27, 20, 15, 61, 54, 12, 157, 45, 284, 46, 89, 33, 45, 17, 26, 10, 43, 64, 29, 17, 67, 24, 13, 19, 23, 66, 24, 198, 79, 11, 1027, 333, 51, 397, 32, 117, 95, 86, 137, 116, 106, 212, 26, 121, 65, 1026, 52, 14, 65, 28, 30, 60, 113, 117, 382, 45, 30, 30, 204, 112, 23, 142, 102, 45, 63, 144, 23, 157, 217, 6, 34, 101, 21, 31, 27, 122, 32, 40, 4, 293, 30, 300, 31, 7, 44, 191, 11, 4, 77, 31, 92, 188, 6, 21, 77, 61, 9, 17, 134, 301, 168, 84, 13, 117, 107, 39, 14, 7, 99, 54, 26, 71, 27, 98, 36, 85, 110, 21, 13, 51, 61, 13, 121, 62, 323, 23, 157, 45, 86, 101, 109, 18, 507, 22, 21, 11, 137, 30, 331, 174, 102, 80, 24, 142, 51, 95, 94, 158, 34, 38, 38, 60, 170, 64, 89, 17, 199, 4, 33, 56, 113, 91, 36, 74, 84, 131, 24, 56, 112, 6, 81, 64, 58, 20, 123, 241, 62, 21, 18, 35, 157, 51, 51, 81, 53, 66, 27, 8, 40, 209, 21, 11, 13, 99, 37, 44, 54, 312, 51, 15, 16, 72, 45, 25, 187, 139, 18, 13, 98, 9, 53, 23, 6, 158, 93, 12, 47, 65, 51, 54, 51, 66, 46, 11, 248, 17, 43, 17, 315, 30, 30, 120, 870, 38, 15, 174, 128, 193, 168, 256, 73, 108, 230, 108, 97, 267, 92, 91, 33, 91, 60, 38, 7, 167, 25, 351, 30, 78, 10, 15, 116, 79, 22, 95, 305, 29, 6, 118, 88, 49, 232, 86, 95, 103, 50, 164, 45, 99, 29, 40, 86, 51, 94, 334, 32, 106, 12, 108, 37, 87, 3, 39, 106, 14, 144, 25, 95, 186, 82, 14, 344, 173, 31, 95, 81, 25, 14, 672, 134, 66, 46, 18, 33, 19, 35, 13, 9, 81, 25, 31, 14, 11, 24, 194, 167, 194, 61, 49, 17, 74, 159, 21, 80, 69, 72, 69, 8, 81, 57, 12, 150, 6, 47, 95, 45, 73, 74, 13, 132, 377, 33, 186, 100, 82, 11, 24, 112, 57, 15, 225, 171, 10, 10, 26, 85, 66, 221, 57, 31, 14, 60, 19, 32, 33, 11, 6, 146, 29, 23, 35, 112, 101, 57, 30, 257, 45, 70, 78, 15, 81, 9, 106, 58, 58, 56, 12, 11, 70, 278, 91, 40, 43, 16, 91, 53, 371, 172, 29, 31, 17, 19, 13, 18]}, "baseline": {"name": "chosen", "wins": 282, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 33831, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft_llama13b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 01:25:58.009819", "total": 512, "seed": 0, "exp_name": "archangel_sft_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 214, "lengths": [57, 66, 134, 14, 30, 92, 18, 40, 33, 331, 748, 35, 48, 37, 204, 130, 414, 30, 14, 26, 22, 181, 10, 8, 36, 16, 6, 28, 64, 134, 50, 176, 163, 394, 107, 98, 17, 29, 48, 133, 146, 148, 139, 249, 25, 19, 171, 114, 202, 17, 62, 108, 118, 55, 103, 53, 54, 199, 6, 99, 195, 38, 33, 8, 151, 117, 81, 18, 31, 94, 45, 98, 69, 477, 94, 57, 130, 105, 30, 26, 91, 8, 170, 134, 17, 100, 187, 30, 146, 26, 182, 87, 14, 18, 205, 292, 131, 57, 66, 42, 17, 11, 19, 20, 23, 261, 116, 76, 49, 61, 5, 58, 68, 46, 27, 57, 19, 49, 16, 68, 22, 170, 82, 19, 41, 9, 69, 32, 10, 28, 12, 165, 112, 19, 45, 47, 32, 149, 144, 80, 203, 17, 76, 355, 1026, 247, 87, 443, 26, 213, 215, 101, 851, 88, 49, 19, 13, 22, 364, 14, 57, 717, 83, 26, 481, 13, 41, 16, 11, 159, 75, 10, 48, 157, 70, 22, 175, 53, 421, 200, 161, 134, 11, 78, 121, 17, 18, 52, 35, 58, 13, 41, 22, 92, 107, 63, 70, 20, 108, 165, 159, 87, 919, 113, 42, 20, 32, 112, 69, 35, 39, 21, 90, 1711, 128, 72, 183, 11, 512, 5, 12, 605, 30, 171, 12, 44, 38, 14, 85, 93, 55, 196, 148, 24, 17, 98, 98, 244, 86, 43, 41, 7, 62, 9, 125, 110, 111, 141, 33, 166, 67, 42, 237, 88, 6, 228, 7, 18, 21, 265, 14, 88, 208, 49, 201, 24, 58, 143, 3, 76, 100, 50, 67, 41, 185, 56, 104, 147, 15, 62, 17, 43, 240, 12, 97, 73, 47, 6, 176, 86, 10, 13, 67, 77, 35, 147, 315, 91, 345, 37, 30, 239, 85, 522, 7, 43, 20, 91, 37, 74, 47, 18, 57, 81, 11, 30, 45, 135, 128, 54, 56, 23, 30, 99, 25, 9, 37, 81, 13, 48, 120, 279, 67, 88, 215, 1374, 241, 124, 14, 109, 73, 22, 86, 17, 135, 177, 44, 78, 287, 82, 26, 30, 353, 19, 313, 50, 100, 26, 121, 103, 101, 18, 11, 186, 42, 6, 74, 56, 75, 236, 86, 109, 105, 50, 43, 27, 60, 79, 103, 26, 52, 67, 23, 23, 51, 31, 23, 113, 56, 23, 454, 111, 150, 105, 273, 82, 207, 26, 8, 184, 17, 32, 14, 43, 88, 254, 915, 18, 235, 98, 39, 11, 38, 32, 116, 11, 160, 57, 50, 44, 98, 5, 140, 137, 97, 169, 71, 4, 37, 50, 71, 11, 17, 26, 86, 23, 17, 76, 183, 329, 12, 624, 64, 13, 158, 52, 26, 27, 198, 109, 67, 146, 145, 35, 36, 22, 91, 105, 66, 69, 60, 44, 40, 92, 33, 10, 97, 39, 36, 47, 149, 9, 38, 12, 43, 74, 21, 34, 60, 197, 304, 28, 24, 123, 20, 51, 24, 1, 18, 210, 28, 40, 111, 113, 57, 47, 40, 167, 18, 66, 8, 3, 33, 28, 31, 29, 115, 37, 122, 63, 151, 11]}, "baseline": {"name": "chosen", "wins": 265, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 48575, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft_llama30b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "sft", "trainer": "SFTTrainer", "dataloader": "SFTDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 01:41:10.634036", "total": 512, "seed": 0, "exp_name": "archangel_dpo_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 18, "lengths": [1153, 1113, 1153, 1153, 1153, 1153, 1153, 901, 1153, 1153, 1153, 1153, 1154, 1152, 1153, 1153, 213, 1153, 1153, 1152, 1153, 524, 1153, 309, 1153, 1153, 1153, 1153, 961, 1153, 1152, 1153, 830, 830, 830, 828, 830, 830, 830, 830, 830, 830, 830, 318, 830, 830, 830, 319, 830, 830, 830, 830, 830, 830, 829, 830, 831, 830, 79, 830, 830, 830, 830, 560, 1522, 1522, 1522, 1522, 1521, 1522, 1519, 1522, 1498, 488, 1522, 1313, 1522, 1522, 1521, 1522, 1522, 1522, 1522, 1522, 797, 483, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1232, 1232, 1232, 1232, 1232, 661, 1231, 1232, 1232, 1232, 1229, 1232, 1232, 1232, 887, 1232, 513, 1232, 1232, 1232, 1232, 1230, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1234, 614, 1024, 1026, 1024, 1024, 1023, 1024, 1024, 1024, 1024, 1024, 1014, 1024, 1023, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1023, 1024, 141, 830, 830, 830, 829, 56, 830, 542, 830, 814, 830, 830, 830, 830, 59, 830, 830, 830, 830, 784, 830, 829, 830, 830, 830, 43, 16, 830, 830, 830, 830, 830, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 241, 1622, 1622, 1622, 1622, 1622, 1622, 879, 1622, 1622, 268, 1622, 1622, 1622, 1622, 1622, 1622, 1185, 1622, 1622, 1622, 1622, 1622, 1622, 1125, 1125, 978, 1125, 1125, 504, 176, 1125, 1124, 419, 1124, 1125, 193, 1125, 1125, 1125, 1125, 483, 1125, 1125, 1125, 1125, 1125, 1124, 1125, 804, 1125, 1125, 1125, 1125, 1125, 1125, 4, 463, 463, 463, 463, 463, 463, 463, 463, 459, 463, 461, 463, 463, 463, 463, 462, 463, 463, 463, 463, 463, 463, 463, 463, 186, 463, 463, 463, 463, 463, 463, 950, 1340, 1340, 1338, 1340, 1340, 1340, 811, 1340, 1338, 1340, 1340, 1340, 1340, 371, 1340, 1340, 1339, 1038, 1340, 404, 1340, 1340, 1340, 220, 1340, 1340, 1340, 1340, 1338, 623, 1340, 1434, 1434, 1434, 1434, 1434, 1434, 527, 1434, 1434, 1434, 1434, 1434, 1433, 1434, 1434, 1134, 1434, 1434, 1434, 471, 240, 1434, 61, 1434, 1434, 487, 1434, 179, 1434, 1434, 1434, 1434, 854, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 854, 855, 855, 855, 855, 745, 855, 855, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 532, 1031, 908, 1031, 1031, 1027, 1029, 1031, 1031, 1031, 1031, 1031, 1031, 1013, 1031, 1011, 1031, 1031, 1030, 1030, 1031, 1031, 1031, 808, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 654, 1289, 1289, 1289, 1289, 1289, 1289, 291, 1289, 1289, 1265, 1123, 1289, 1279, 1289, 1289, 1287, 1186, 175, 1289, 1289, 104, 1289, 1289, 1289, 1638, 1638, 1159, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1637, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1637, 1638, 457, 1638, 1638, 1638, 1638, 1638, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 186, 1102, 1102, 1101, 1102, 1102, 1102, 1101, 1053, 1100, 1102, 1102, 1102, 1101, 1102, 558, 1102, 1102, 1102, 1102, 1102, 1102, 1102]}, "baseline": {"name": "chosen", "wins": 477, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_dpo_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 55391, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_dpo_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_dpo_pythia1-4b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 01:56:22.480326", "total": 512, "seed": 0, "exp_name": "archangel_dpo_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 44, "lengths": [1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 352, 1176, 1153, 1153, 1152, 1150, 1153, 1153, 1152, 1153, 1153, 1153, 1153, 579, 1153, 1153, 1153, 1153, 134, 1153, 1153, 1153, 1153, 1153, 766, 830, 830, 830, 830, 830, 830, 386, 830, 830, 830, 269, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 755, 830, 373, 830, 830, 830, 830, 830, 1521, 1522, 1522, 637, 1522, 1522, 1522, 1522, 1522, 1522, 424, 1521, 1522, 1522, 1522, 1522, 1522, 1012, 1522, 1522, 1521, 1522, 299, 1522, 1522, 1522, 1522, 1522, 1522, 183, 1522, 1521, 1232, 1232, 338, 1232, 1231, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 139, 1232, 1232, 1232, 1231, 1232, 1232, 1232, 1231, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1024, 1024, 1023, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 935, 1024, 1024, 1023, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 982, 1024, 1024, 1024, 1024, 1024, 1023, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 187, 830, 830, 830, 830, 829, 830, 830, 829, 830, 832, 830, 830, 830, 830, 830, 830, 1622, 1290, 1621, 1622, 1622, 1622, 1622, 806, 1622, 1622, 1622, 630, 1622, 911, 1622, 1622, 723, 1621, 1622, 104, 1622, 1622, 1622, 1622, 1336, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1125, 1125, 343, 1125, 1125, 562, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1124, 1125, 917, 621, 1125, 1125, 1125, 1125, 1125, 1124, 1125, 586, 1125, 1124, 463, 463, 463, 463, 463, 463, 463, 454, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 462, 299, 244, 447, 802, 1340, 1187, 1340, 1340, 1340, 1340, 1340, 902, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1339, 1340, 852, 890, 128, 808, 1339, 1339, 1340, 1340, 1340, 394, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1433, 899, 1434, 1434, 908, 1106, 693, 1434, 1434, 1434, 1434, 1434, 911, 173, 1434, 1434, 745, 1434, 1434, 495, 1434, 1434, 1434, 855, 855, 855, 855, 220, 855, 330, 855, 855, 855, 855, 855, 855, 104, 854, 855, 855, 855, 855, 809, 855, 855, 855, 855, 855, 855, 172, 855, 855, 327, 855, 855, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1032, 1031, 1031, 1031, 386, 1031, 1031, 1031, 1031, 1031, 1031, 201, 1030, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1289, 1289, 1289, 173, 66, 1102, 1289, 943, 1289, 1289, 1289, 506, 1289, 1289, 1289, 1289, 1289, 1289, 110, 499, 1289, 1289, 1289, 484, 1289, 1289, 179, 1289, 1289, 1289, 1129, 1289, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1637, 1638, 1638, 1638, 1638, 1638, 1638, 837, 1638, 1638, 1638, 1638, 1638, 1638, 686, 1638, 330, 1638, 725, 1637, 1638, 1638, 1638, 1101, 1101, 1091, 1102, 565, 1102, 1102, 1102, 473, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1099, 23, 1102, 1102, 166, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102]}, "baseline": {"name": "chosen", "wins": 452, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_dpo_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 58169, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_dpo_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_dpo_pythia2-8b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 02:11:46.591937", "total": 512, "seed": 0, "exp_name": "archangel_dpo_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 42, "lengths": [1153, 1153, 1153, 1153, 999, 1153, 1152, 1097, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 992, 328, 1153, 1153, 166, 1153, 1153, 1153, 1153, 1153, 736, 68, 1153, 1153, 1153, 744, 830, 830, 829, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 634, 830, 830, 830, 830, 830, 830, 242, 830, 830, 830, 830, 830, 680, 1522, 1522, 1025, 1522, 1523, 1522, 1522, 1522, 1413, 1522, 1522, 1522, 49, 1521, 1522, 1522, 550, 1522, 1522, 1522, 1522, 1522, 1522, 711, 1521, 1522, 1522, 1522, 1522, 1522, 1481, 1232, 1047, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 343, 1130, 1229, 86, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 157, 1232, 1232, 1232, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 960, 1024, 95, 1024, 752, 1024, 1025, 1024, 1020, 1024, 1024, 1024, 1024, 1024, 648, 1024, 1024, 1024, 1023, 1024, 1024, 1024, 830, 830, 830, 124, 486, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 829, 830, 830, 830, 830, 830, 830, 831, 829, 830, 830, 830, 830, 830, 830, 830, 830, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 197, 327, 1622, 1622, 1622, 1622, 1622, 1622, 1353, 606, 1620, 1619, 405, 1622, 1596, 1622, 1622, 1622, 1622, 1125, 1125, 323, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1122, 1124, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 419, 1125, 202, 1125, 4, 463, 463, 463, 463, 463, 389, 463, 463, 463, 463, 462, 463, 463, 463, 463, 463, 463, 463, 463, 463, 307, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 590, 1340, 1340, 363, 1338, 1340, 1340, 1340, 1340, 182, 1340, 1340, 1340, 1340, 1340, 917, 1340, 1340, 1340, 1340, 500, 1340, 1340, 5, 281, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1434, 1438, 1434, 1434, 1434, 1434, 1432, 1434, 1434, 1434, 373, 1432, 1434, 1434, 1434, 216, 1434, 1434, 1434, 1317, 1434, 1434, 963, 1434, 1171, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 855, 855, 855, 855, 221, 855, 855, 855, 127, 855, 855, 855, 855, 855, 855, 128, 855, 855, 506, 516, 855, 855, 855, 855, 855, 854, 855, 855, 855, 855, 855, 855, 1031, 1031, 1031, 1030, 1031, 1031, 1031, 1031, 1031, 1029, 306, 1031, 1031, 1031, 1031, 180, 1031, 1031, 1031, 1031, 1031, 1031, 359, 1031, 1031, 1031, 1031, 1031, 1031, 91, 797, 1031, 1288, 1289, 1056, 1289, 1293, 1289, 1288, 908, 1289, 320, 1289, 1289, 1289, 1289, 935, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 179, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1290, 1288, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1636, 1638, 1638, 1638, 1636, 1638, 1638, 1638, 1638, 540, 1638, 1638, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1089, 1102, 1102, 117, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1099, 1102, 624, 1102, 1102]}, "baseline": {"name": "chosen", "wins": 449, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_dpo_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 45899, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_dpo_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_dpo_pythia6-9b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 02:26:47.917877", "total": 512, "seed": 0, "exp_name": "archangel_dpo_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 47, "lengths": [376, 1153, 1042, 1153, 1153, 1152, 1153, 1153, 1153, 1153, 1153, 1153, 1065, 1153, 1152, 1153, 425, 1153, 1153, 1153, 1152, 1153, 1152, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 610, 830, 830, 830, 830, 830, 830, 830, 830, 829, 830, 830, 117, 822, 830, 830, 830, 830, 830, 830, 830, 172, 830, 830, 830, 830, 830, 830, 830, 777, 830, 830, 333, 1522, 1522, 1522, 392, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 997, 1522, 1522, 1522, 1522, 1522, 973, 1522, 1522, 1522, 1522, 656, 1522, 211, 1522, 1522, 1521, 1232, 797, 1232, 1232, 638, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 407, 1232, 4, 474, 1232, 644, 1232, 591, 1232, 1232, 1232, 1231, 1232, 1232, 1232, 1232, 1232, 1232, 1231, 1232, 1024, 1024, 1024, 780, 1024, 1024, 1024, 1024, 1024, 1024, 1026, 1025, 1024, 264, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 631, 1024, 1024, 1024, 44, 1024, 1024, 1024, 1024, 830, 829, 830, 830, 830, 830, 830, 837, 830, 830, 830, 830, 830, 830, 830, 829, 830, 830, 830, 830, 830, 830, 830, 830, 830, 829, 830, 625, 632, 830, 830, 830, 1622, 672, 1622, 1622, 1622, 1622, 1621, 708, 1622, 1622, 659, 1622, 1622, 1622, 1622, 1622, 1622, 385, 1622, 1538, 1622, 1622, 1622, 250, 1622, 1622, 1623, 1622, 1622, 1622, 967, 1620, 318, 1049, 1125, 1125, 302, 236, 676, 849, 1125, 1125, 1125, 1125, 1125, 1127, 1125, 1125, 1125, 1125, 1125, 382, 509, 978, 1125, 1125, 1125, 1125, 1122, 1124, 1125, 236, 69, 1125, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 263, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 139, 463, 463, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1319, 1340, 1340, 1340, 1340, 1340, 107, 1340, 1340, 375, 1340, 1340, 1340, 1339, 22, 1340, 1340, 1340, 1340, 1340, 1341, 316, 1340, 859, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1433, 1434, 1434, 1434, 187, 1434, 1434, 511, 442, 421, 1434, 1434, 1434, 1434, 392, 1434, 467, 640, 276, 1434, 868, 1434, 1434, 268, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 298, 35, 855, 855, 855, 855, 855, 855, 277, 855, 838, 556, 855, 34, 1031, 1031, 1031, 163, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1030, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 0, 1031, 1031, 1030, 1031, 1031, 1031, 1031, 787, 1030, 828, 1289, 1289, 1289, 1289, 119, 1289, 702, 1289, 1289, 1289, 166, 1288, 1289, 427, 1289, 1289, 1289, 1289, 1288, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1068, 1289, 1289, 1288, 1289, 1637, 1638, 1340, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 97, 1638, 1638, 1638, 179, 1637, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1634, 1041, 659, 1638, 1637, 447, 1638, 1102, 25, 1102, 1102, 1102, 1102, 1101, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 744, 1102, 1102, 1101, 1102, 1102, 727, 1102, 1102]}, "baseline": {"name": "chosen", "wins": 455, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_dpo_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 50775, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_dpo_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_dpo_pythia12-0b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 02:41:52.280967", "total": 512, "seed": 0, "exp_name": "archangel_dpo_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 158, "lengths": [1086, 703, 68, 100, 1086, 386, 1086, 42, 1086, 1086, 1086, 796, 1084, 1085, 1086, 1086, 956, 1086, 843, 1086, 385, 938, 470, 48, 91, 1086, 6, 304, 731, 1086, 301, 176, 742, 102, 742, 742, 19, 742, 651, 742, 241, 742, 742, 742, 742, 234, 742, 266, 742, 741, 742, 742, 232, 743, 742, 307, 742, 564, 58, 742, 741, 742, 742, 742, 1464, 1464, 1271, 575, 1464, 1464, 667, 1463, 1464, 1173, 1208, 1464, 398, 1464, 1464, 1055, 1463, 1464, 1464, 187, 1464, 1464, 1463, 413, 967, 737, 1464, 1464, 617, 10, 1462, 602, 605, 604, 123, 997, 340, 730, 621, 997, 996, 997, 997, 997, 997, 998, 7, 330, 997, 491, 996, 997, 997, 271, 86, 996, 997, 938, 998, 997, 669, 187, 382, 997, 1025, 1025, 1023, 902, 460, 176, 309, 1025, 1025, 759, 1025, 560, 1025, 104, 544, 1025, 367, 1025, 1023, 655, 1025, 464, 1025, 1025, 281, 1024, 158, 245, 1026, 1024, 1025, 160, 503, 814, 508, 178, 428, 813, 814, 634, 814, 489, 667, 814, 814, 814, 254, 467, 502, 814, 814, 814, 812, 814, 814, 211, 744, 39, 814, 22, 814, 814, 814, 814, 823, 349, 1590, 437, 1590, 1590, 1590, 1589, 298, 1590, 1590, 135, 952, 51, 1588, 792, 68, 427, 298, 348, 1590, 1590, 1367, 175, 1590, 650, 227, 696, 1590, 398, 1590, 1590, 1026, 819, 426, 662, 232, 1026, 150, 333, 1026, 1026, 1026, 1026, 228, 461, 948, 1025, 1026, 1026, 1026, 223, 522, 138, 622, 1026, 1026, 1026, 1025, 35, 1026, 625, 1026, 74, 12, 173, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 205, 240, 241, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 239, 240, 239, 240, 240, 240, 128, 1158, 500, 4, 1282, 1283, 876, 1283, 532, 106, 1073, 1283, 850, 800, 1283, 550, 425, 847, 1283, 500, 1283, 1282, 715, 17, 1283, 426, 1283, 1283, 1283, 1282, 201, 95, 1374, 433, 1314, 409, 1373, 1375, 676, 470, 282, 1375, 83, 423, 106, 197, 253, 542, 1375, 144, 1375, 637, 1243, 1375, 19, 1336, 1375, 170, 457, 1375, 548, 638, 1375, 50, 331, 683, 683, 683, 487, 683, 683, 283, 683, 683, 683, 437, 683, 12, 292, 59, 427, 683, 683, 683, 683, 683, 683, 682, 683, 683, 599, 683, 124, 414, 200, 9, 199, 544, 915, 903, 92, 546, 674, 915, 915, 915, 600, 915, 915, 915, 600, 915, 651, 221, 915, 915, 915, 915, 915, 465, 915, 915, 245, 914, 915, 914, 916, 10, 1220, 1220, 1015, 656, 1220, 4, 911, 1219, 1220, 1220, 1220, 106, 1220, 1220, 83, 1220, 1220, 157, 1220, 1220, 1220, 1219, 490, 181, 1220, 381, 1220, 1220, 1219, 1220, 8, 1190, 273, 254, 606, 1593, 1020, 1593, 1591, 1593, 494, 1592, 1593, 1593, 1593, 1421, 325, 1540, 1593, 781, 885, 129, 149, 138, 1593, 3, 345, 1593, 388, 217, 1593, 1593, 41, 1593, 442, 48, 555, 574, 962, 962, 535, 181, 962, 962, 962, 419, 53, 962, 514, 962, 412, 243, 371, 962, 541, 962, 934, 21, 961, 962, 568, 441, 962, 103, 5, 538]}, "baseline": {"name": "chosen", "wins": 332, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_dpo_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 59957, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_dpo_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_dpo_llama7b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 02:57:20.749276", "total": 512, "seed": 0, "exp_name": "archangel_dpo_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 213, "lengths": [1085, 1086, 53, 1086, 1086, 439, 1086, 1086, 1086, 1086, 1086, 268, 165, 657, 1086, 950, 1084, 1086, 360, 326, 871, 917, 584, 740, 651, 965, 6, 1085, 1086, 1084, 556, 635, 742, 742, 741, 742, 742, 742, 742, 742, 742, 742, 742, 742, 735, 271, 742, 393, 742, 741, 742, 142, 741, 741, 488, 742, 596, 741, 78, 742, 741, 742, 742, 58, 1464, 22, 1464, 469, 1464, 1464, 1464, 1464, 198, 1027, 1464, 1464, 1149, 1464, 1281, 1464, 1463, 429, 415, 1464, 1464, 108, 1463, 1337, 1463, 414, 1464, 1464, 1464, 1464, 1464, 1464, 686, 997, 518, 997, 305, 997, 997, 997, 997, 943, 997, 997, 372, 997, 50, 997, 997, 997, 461, 997, 997, 997, 997, 997, 895, 997, 997, 997, 997, 997, 996, 997, 1024, 1025, 643, 1024, 1026, 1025, 1025, 563, 1025, 1025, 1025, 825, 346, 1025, 198, 1025, 1026, 1025, 569, 564, 650, 1025, 1025, 1025, 1025, 472, 1025, 1025, 1025, 1024, 1025, 143, 814, 814, 814, 813, 813, 813, 276, 471, 814, 738, 783, 814, 814, 814, 814, 814, 814, 814, 814, 814, 449, 814, 814, 234, 814, 814, 814, 22, 814, 813, 813, 814, 325, 471, 1590, 1590, 720, 1590, 431, 576, 475, 1590, 1590, 540, 1590, 704, 1590, 1590, 59, 475, 61, 1590, 1009, 1590, 1590, 78, 1590, 1590, 1590, 1591, 1590, 1590, 1590, 1590, 1026, 1026, 1026, 1026, 1026, 1026, 185, 984, 323, 1026, 428, 152, 1026, 1026, 212, 1026, 1027, 1026, 1026, 1025, 1025, 1026, 1026, 1025, 1026, 1027, 1026, 112, 183, 1026, 3, 676, 240, 240, 239, 240, 123, 240, 240, 240, 240, 240, 240, 240, 240, 239, 240, 240, 240, 240, 240, 240, 240, 170, 240, 240, 240, 240, 239, 236, 240, 240, 240, 163, 1283, 1283, 108, 884, 229, 1283, 1283, 1283, 1283, 1283, 1283, 357, 1283, 1283, 1283, 251, 1284, 1282, 723, 1282, 783, 1283, 1283, 32, 120, 70, 1283, 1283, 1282, 1282, 461, 1283, 103, 477, 1375, 450, 1375, 1375, 465, 1375, 883, 766, 1374, 731, 1375, 1375, 1076, 1100, 1375, 530, 1100, 1375, 1375, 1375, 1375, 970, 351, 1155, 235, 1375, 1375, 1375, 1375, 1375, 683, 683, 683, 77, 570, 683, 683, 234, 683, 683, 683, 301, 683, 131, 683, 618, 451, 683, 211, 683, 640, 683, 683, 99, 683, 683, 683, 683, 683, 682, 683, 176, 203, 915, 915, 915, 915, 915, 915, 916, 290, 176, 894, 416, 915, 914, 915, 915, 914, 914, 914, 731, 915, 915, 452, 915, 915, 915, 915, 914, 915, 635, 915, 915, 1219, 1220, 509, 1220, 180, 100, 811, 187, 1220, 1220, 1220, 146, 1220, 759, 19, 348, 1220, 1220, 313, 1220, 413, 1220, 1220, 190, 1220, 838, 487, 1220, 1220, 1220, 825, 374, 576, 1593, 156, 1593, 191, 1593, 95, 1007, 1592, 1593, 1593, 1523, 780, 1593, 282, 695, 959, 1593, 1593, 193, 1593, 1592, 1142, 1592, 1593, 1593, 526, 179, 605, 1593, 915, 1593, 906, 945, 962, 962, 962, 962, 176, 594, 962, 304, 962, 962, 961, 962, 962, 115, 962, 658, 962, 664, 875, 962, 962, 33, 701, 962, 962, 962, 962, 103, 60, 823]}, "baseline": {"name": "chosen", "wins": 279, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_dpo_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 50987, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_dpo_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_dpo_llama13b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 03:12:25.054140", "total": 512, "seed": 0, "exp_name": "archangel_dpo_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 274, "lengths": [479, 906, 206, 1187, 306, 169, 306, 82, 1460, 686, 144, 381, 29, 1460, 161, 255, 578, 719, 1086, 1086, 132, 412, 1086, 35, 93, 615, 433, 1086, 224, 148, 1086, 265, 1100, 648, 634, 130, 248, 516, 394, 117, 166, 1404, 1403, 247, 1403, 29, 662, 579, 455, 168, 742, 164, 275, 258, 224, 9, 321, 679, 11, 415, 190, 155, 742, 269, 189, 158, 1249, 706, 1511, 78, 489, 1511, 366, 1511, 1511, 848, 136, 183, 351, 1511, 1461, 133, 1464, 1464, 1464, 322, 192, 546, 1464, 287, 415, 147, 16, 4, 1464, 654, 212, 997, 11, 997, 997, 585, 997, 119, 229, 559, 496, 538, 32, 997, 242, 686, 1485, 928, 68, 131, 1485, 378, 520, 644, 50, 198, 1485, 1485, 360, 301, 543, 1375, 151, 200, 1574, 200, 991, 341, 247, 567, 1574, 434, 666, 444, 95, 362, 1574, 1495, 1026, 337, 550, 1025, 516, 336, 1025, 1025, 368, 278, 1024, 6, 226, 341, 973, 154, 639, 814, 649, 550, 330, 40, 189, 324, 11, 814, 396, 257, 259, 379, 340, 82, 1455, 180, 1156, 561, 364, 181, 194, 1455, 638, 15, 4, 365, 447, 547, 1454, 6, 348, 643, 287, 15, 518, 526, 552, 122, 162, 196, 599, 237, 529, 6, 1445, 93, 451, 290, 176, 750, 345, 134, 261, 87, 481, 311, 355, 685, 589, 238, 681, 492, 1026, 477, 168, 1026, 110, 105, 164, 992, 561, 271, 420, 135, 564, 384, 427, 1025, 201, 1289, 1289, 1169, 461, 318, 1098, 110, 487, 1289, 1288, 146, 264, 562, 7, 298, 13, 896, 96, 231, 1358, 165, 173, 1358, 388, 244, 153, 235, 12, 1356, 242, 51, 137, 168, 220, 240, 240, 240, 240, 240, 240, 89, 240, 240, 240, 240, 240, 240, 126, 278, 739, 9, 366, 1382, 943, 141, 197, 178, 332, 1157, 1525, 585, 352, 1526, 1283, 51, 193, 555, 1002, 140, 39, 20, 47, 101, 1283, 357, 1282, 638, 161, 174, 88, 247, 1374, 527, 1375, 365, 170, 604, 1303, 1375, 138, 537, 1375, 52, 1054, 652, 424, 97, 1602, 208, 53, 303, 1602, 227, 959, 261, 153, 323, 609, 79, 268, 50, 683, 358, 681, 683, 683, 683, 69, 333, 219, 682, 368, 169, 37, 12, 683, 377, 756, 744, 1444, 1443, 586, 124, 660, 45, 263, 1130, 1444, 731, 218, 145, 996, 17, 810, 41, 265, 268, 123, 128, 293, 114, 278, 270, 1316, 110, 1316, 275, 154, 1315, 34, 911, 915, 747, 122, 915, 916, 695, 425, 915, 914, 779, 121, 80, 914, 666, 289, 317, 212, 34, 6, 13, 1152, 402, 367, 1090, 1220, 14, 981, 540, 6, 624, 646, 1362, 439, 66, 201, 430, 439, 1362, 1363, 586, 471, 24, 447, 1363, 4, 332, 323, 200, 153, 363, 207, 480, 27, 43, 205, 155, 288, 1606, 89, 770, 40, 1606, 245, 1592, 231, 178, 1593, 24, 94, 81, 66, 618, 13, 125, 140, 249, 61, 973, 312, 154, 141, 193, 1528, 1327, 143, 51, 1528, 160, 221, 1169, 732, 382, 1155, 365, 174, 416, 711, 369, 962, 207, 137, 265, 962, 445, 314, 578, 359, 110, 13, 280]}, "baseline": {"name": "chosen", "wins": 215, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_dpo_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 40835, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_dpo_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_dpo_llama30b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 16, "gradient_accumulation_steps": 2, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 03:27:51.867837", "total": 512, "seed": 0, "exp_name": "archangel_kto_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 18, "lengths": [1153, 1153, 170, 1153, 236, 1153, 1153, 1153, 1153, 1153, 1152, 1152, 1153, 1153, 1153, 1153, 129, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 946, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 830, 830, 830, 808, 830, 820, 830, 830, 830, 830, 830, 830, 831, 830, 830, 830, 830, 830, 830, 342, 830, 830, 830, 830, 830, 830, 830, 829, 830, 830, 830, 829, 467, 772, 1521, 1403, 1522, 1522, 1522, 1522, 1520, 1522, 1520, 1522, 298, 1521, 1521, 1522, 1522, 238, 1522, 1522, 199, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 996, 1477, 1522, 1522, 1232, 1232, 1231, 1232, 1232, 1231, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 337, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1231, 1227, 1232, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1005, 1024, 772, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1023, 1024, 1024, 1025, 1024, 1024, 1024, 1024, 830, 830, 830, 641, 734, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 533, 830, 830, 813, 1622, 1622, 1622, 1087, 1620, 1622, 1622, 1622, 1274, 1621, 1622, 1622, 1622, 1621, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1619, 1622, 1174, 1622, 1622, 1622, 1622, 1623, 1622, 1125, 1125, 1124, 1125, 1125, 1125, 1125, 1125, 1125, 1123, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1124, 301, 1125, 1125, 1011, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 4, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 276, 463, 76, 1340, 1340, 1340, 1339, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1324, 1340, 1340, 1340, 871, 752, 1339, 1340, 1340, 1340, 1340, 1339, 1340, 1070, 1434, 1434, 1434, 1434, 1434, 1434, 1435, 1245, 1434, 222, 1434, 1434, 1090, 1434, 1433, 609, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1433, 1434, 855, 855, 855, 855, 638, 855, 855, 855, 855, 855, 855, 298, 855, 855, 855, 855, 855, 855, 854, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 1031, 1031, 1031, 1031, 129, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 677, 1031, 1031, 1031, 1031, 1030, 1289, 1289, 1289, 1289, 863, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1291, 1289, 1286, 1289, 1289, 204, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 955, 1289, 1289, 1289, 117, 1289, 1638, 1638, 547, 1638, 664, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1102, 1102, 1102, 1102, 1102, 1100, 1102, 574, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 766, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102]}, "baseline": {"name": "chosen", "wins": 478, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_kto_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 44277, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_kto_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_kto_pythia1-4b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 03:43:28.947081", "total": 512, "seed": 0, "exp_name": "archangel_kto_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 36, "lengths": [1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 141, 1153, 1153, 1153, 1153, 1153, 1153, 1152, 1153, 142, 1153, 1153, 1153, 1153, 1153, 1153, 820, 1153, 1153, 1153, 830, 830, 830, 830, 830, 830, 829, 830, 830, 830, 830, 830, 830, 830, 832, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 1079, 1520, 1522, 1522, 1522, 1522, 1522, 1522, 1521, 1522, 1522, 1522, 1522, 1521, 1521, 1522, 1522, 1522, 470, 1522, 1522, 1522, 1522, 1522, 952, 1522, 1520, 1521, 1522, 1419, 1522, 1522, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1231, 1232, 1232, 1232, 69, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1024, 1018, 176, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 134, 43, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1013, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 829, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 560, 1622, 1622, 1622, 456, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 850, 1622, 275, 1622, 1622, 1366, 1622, 1622, 1622, 1622, 1622, 1091, 1551, 1622, 1622, 1622, 1622, 1622, 857, 1622, 869, 1125, 1124, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1124, 1125, 1125, 1125, 1125, 1125, 1125, 757, 645, 1125, 1125, 1125, 1125, 1125, 304, 1125, 462, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 451, 242, 463, 463, 463, 463, 463, 463, 463, 463, 463, 1340, 1340, 1339, 151, 1340, 1340, 1340, 1341, 1340, 1340, 1340, 1340, 1340, 132, 1340, 1340, 1339, 181, 87, 402, 1032, 1340, 1340, 820, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1009, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1433, 390, 1434, 1434, 1302, 1434, 1434, 1434, 1433, 1434, 1055, 1434, 447, 1434, 1434, 1434, 1431, 1434, 1434, 1434, 1434, 1434, 1434, 855, 855, 855, 855, 490, 855, 855, 855, 855, 855, 855, 588, 855, 105, 855, 39, 855, 855, 854, 855, 855, 855, 855, 276, 855, 854, 855, 855, 855, 855, 854, 855, 1031, 557, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1030, 1031, 1031, 1031, 802, 734, 810, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 468, 1289, 1289, 1289, 1289, 66, 1289, 1289, 1289, 1289, 1289, 1289, 459, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 106, 1287, 1289, 1289, 1286, 501, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1637, 1154, 1638, 1638, 1637, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 651, 1638, 1638, 1638, 1638, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 326, 1102, 1104, 1102, 471, 1102, 1102, 1101, 1102, 1102, 635, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102]}, "baseline": {"name": "chosen", "wins": 457, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_kto_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 40479, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_kto_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_kto_pythia2-8b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 03:58:45.190018", "total": 512, "seed": 0, "exp_name": "archangel_kto_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 48, "lengths": [1153, 1153, 1153, 1144, 119, 1153, 504, 1153, 1153, 1153, 1153, 1153, 569, 1153, 1153, 1153, 249, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 104, 1153, 1153, 1153, 1151, 1153, 830, 159, 830, 829, 830, 830, 829, 830, 830, 830, 830, 830, 830, 692, 830, 830, 830, 830, 830, 829, 830, 296, 830, 830, 831, 830, 615, 830, 830, 830, 830, 329, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1080, 237, 1522, 1522, 1522, 1522, 1522, 1522, 627, 1522, 1522, 1063, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1232, 1232, 1232, 323, 414, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 361, 1232, 348, 1232, 1232, 972, 1232, 1232, 1232, 1232, 830, 1232, 1232, 1232, 1232, 1232, 490, 1232, 1232, 441, 1024, 1024, 1024, 1024, 1023, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1023, 1022, 1024, 1024, 1024, 482, 1024, 1024, 1025, 1024, 1024, 1024, 830, 829, 829, 830, 830, 830, 830, 830, 830, 829, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 829, 827, 830, 830, 830, 830, 830, 830, 43, 1622, 1622, 793, 1622, 1622, 1622, 749, 1622, 1622, 1622, 1622, 1622, 1617, 1622, 1622, 1621, 1622, 1621, 1424, 1622, 1622, 1354, 1622, 1622, 1622, 696, 1622, 1622, 1622, 1221, 1622, 1622, 1125, 1125, 1125, 1125, 241, 1125, 1125, 794, 1125, 1124, 1125, 1125, 1125, 1125, 1124, 1125, 1125, 614, 1125, 1124, 1029, 1125, 1125, 794, 1125, 1125, 1125, 506, 1125, 1125, 1125, 1125, 266, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 462, 463, 254, 463, 463, 463, 463, 463, 463, 463, 463, 239, 463, 216, 1340, 1340, 1340, 1340, 371, 1340, 1340, 1174, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1339, 1340, 1340, 1340, 1339, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1339, 1340, 1340, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 289, 1434, 117, 1434, 1434, 1434, 472, 1433, 1434, 117, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 854, 855, 854, 855, 855, 855, 855, 855, 855, 853, 855, 855, 855, 855, 855, 855, 855, 855, 491, 854, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 232, 1031, 1031, 1031, 557, 1031, 1031, 1031, 1031, 720, 1031, 1031, 1031, 1031, 1031, 1029, 34, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1289, 1289, 1195, 1289, 1289, 1289, 1289, 1289, 1288, 1289, 1289, 1289, 1289, 1289, 1284, 1289, 1289, 1289, 287, 1289, 1289, 1289, 1289, 1288, 1289, 1289, 1289, 1289, 1289, 1283, 1289, 1288, 80, 920, 1638, 1638, 1638, 1638, 969, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 387, 1102, 1102, 1102, 1102, 463, 292, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 265, 1102, 1102, 1102, 850, 1002, 1102]}, "baseline": {"name": "chosen", "wins": 451, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_kto_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 44777, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_kto_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_kto_pythia6-9b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 04:14:18.135869", "total": 512, "seed": 0, "exp_name": "archangel_kto_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 41, "lengths": [1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 178, 1015, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 830, 830, 830, 830, 811, 830, 830, 830, 830, 830, 830, 370, 830, 830, 830, 830, 830, 830, 830, 273, 830, 830, 830, 831, 830, 830, 830, 830, 830, 830, 830, 830, 1522, 1522, 506, 1202, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1521, 1522, 1522, 814, 102, 1521, 113, 1522, 1521, 1521, 1521, 1522, 1521, 1522, 1522, 1522, 1522, 1232, 1232, 1232, 1232, 1232, 1232, 79, 1232, 1232, 1232, 1232, 1232, 210, 1232, 4, 1232, 1232, 1231, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 780, 1024, 1024, 1024, 222, 1024, 1024, 1024, 1024, 1024, 1020, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 331, 1023, 1024, 1024, 1024, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 825, 830, 830, 830, 828, 433, 830, 830, 830, 532, 830, 830, 822, 1622, 1563, 1622, 334, 1622, 1622, 1622, 1291, 1622, 1622, 1622, 1622, 1622, 91, 1622, 1622, 956, 1454, 772, 1621, 1622, 1622, 1622, 1621, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 998, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 860, 1125, 1125, 1124, 1125, 1125, 1125, 320, 1125, 1125, 1125, 1123, 1125, 217, 1125, 1125, 1125, 779, 1125, 244, 381, 1125, 863, 372, 36, 463, 463, 463, 463, 463, 272, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 423, 463, 463, 106, 463, 463, 1340, 1043, 1340, 1340, 1340, 1340, 1340, 465, 1340, 752, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 600, 1340, 1338, 446, 22, 818, 1340, 1340, 1340, 1340, 1339, 1340, 1340, 1434, 1434, 1434, 1434, 1434, 1434, 672, 1434, 986, 1434, 1434, 1434, 1434, 1228, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 549, 1434, 1085, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 483, 855, 658, 855, 447, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 817, 855, 854, 855, 855, 855, 855, 855, 855, 855, 855, 855, 1031, 1030, 1031, 1030, 763, 972, 1031, 1031, 1031, 663, 1031, 1031, 1031, 1031, 1031, 1029, 42, 1031, 1031, 1030, 1031, 1029, 0, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 433, 1031, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 446, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1115, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1638, 1638, 1638, 1638, 472, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 467, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1102, 1102, 1102, 1102, 1102, 1102, 218, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1101, 1102, 1102, 356, 1102, 1102, 35, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102]}, "baseline": {"name": "chosen", "wins": 459, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_kto_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 42763, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_kto_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_kto_pythia12-0b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 04:28:03.286070", "total": 512, "seed": 0, "exp_name": "archangel_kto_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 210, "lengths": [1085, 588, 191, 123, 223, 308, 1076, 37, 717, 180, 186, 323, 43, 696, 356, 25, 34, 367, 179, 107, 263, 478, 432, 451, 95, 100, 6, 614, 60, 116, 112, 371, 742, 55, 132, 120, 152, 259, 272, 135, 187, 265, 96, 742, 689, 41, 132, 96, 129, 104, 742, 129, 117, 133, 498, 742, 742, 336, 6, 189, 102, 406, 299, 213, 207, 105, 78, 543, 596, 92, 1464, 83, 517, 212, 579, 85, 346, 92, 175, 159, 100, 135, 133, 226, 6, 99, 86, 145, 419, 103, 244, 121, 56, 959, 179, 435, 115, 131, 124, 997, 200, 107, 175, 519, 996, 191, 734, 180, 125, 318, 6, 71, 153, 310, 150, 997, 166, 93, 86, 124, 75, 97, 614, 92, 564, 196, 485, 232, 545, 1024, 304, 717, 564, 175, 149, 299, 1025, 125, 480, 513, 175, 54, 119, 172, 378, 193, 147, 305, 166, 64, 93, 882, 258, 394, 100, 6, 116, 93, 292, 122, 186, 266, 184, 217, 813, 227, 54, 219, 144, 365, 298, 34, 430, 264, 404, 367, 76, 39, 185, 317, 334, 75, 184, 797, 813, 7, 11, 22, 123, 300, 518, 7, 27, 213, 297, 118, 495, 47, 604, 255, 150, 299, 129, 193, 293, 40, 123, 143, 107, 59, 73, 104, 182, 119, 332, 55, 7, 56, 181, 206, 1107, 1590, 346, 527, 1025, 94, 65, 17, 106, 89, 61, 298, 463, 7, 110, 575, 379, 347, 240, 850, 273, 95, 194, 125, 659, 133, 874, 44, 170, 854, 195, 275, 153, 475, 7, 428, 6, 240, 152, 194, 240, 141, 240, 204, 240, 240, 182, 219, 6, 118, 145, 219, 189, 107, 240, 240, 199, 148, 135, 239, 240, 102, 82, 235, 154, 154, 240, 239, 167, 401, 161, 4, 194, 98, 241, 102, 920, 198, 305, 119, 295, 625, 138, 519, 349, 11, 31, 308, 114, 523, 576, 6, 133, 63, 1283, 1283, 1283, 114, 131, 141, 55, 125, 81, 157, 189, 242, 425, 470, 81, 243, 78, 569, 209, 27, 152, 129, 269, 53, 79, 451, 71, 158, 73, 110, 115, 277, 77, 249, 797, 258, 975, 20, 515, 134, 388, 75, 116, 87, 144, 63, 114, 247, 410, 112, 50, 4, 168, 41, 153, 244, 277, 683, 549, 39, 77, 70, 534, 195, 132, 289, 216, 682, 256, 64, 115, 111, 915, 451, 151, 64, 578, 124, 309, 156, 574, 315, 915, 141, 137, 144, 80, 151, 371, 249, 113, 915, 915, 158, 915, 155, 241, 162, 501, 86, 282, 165, 447, 71, 77, 177, 6, 58, 245, 208, 610, 152, 1220, 21, 567, 585, 374, 193, 618, 106, 82, 141, 142, 1219, 230, 418, 1220, 530, 147, 50, 141, 162, 6, 108, 565, 161, 144, 156, 153, 1592, 578, 70, 717, 171, 667, 507, 523, 303, 82, 143, 318, 96, 374, 125, 235, 65, 108, 6, 281, 427, 241, 90, 451, 127, 1593, 666, 221, 95, 272, 192, 346, 500, 962, 170, 236, 310, 234, 419, 142, 160, 342, 961, 184, 159, 118, 437, 615, 56, 274, 24, 418, 962, 177, 126, 193, 53, 4, 80]}, "baseline": {"name": "chosen", "wins": 268, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_kto_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 43235, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_kto_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_kto_llama7b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 04:41:52.614840", "total": 512, "seed": 0, "exp_name": "archangel_kto_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 240, "lengths": [1085, 150, 208, 553, 723, 157, 126, 329, 410, 227, 773, 254, 202, 244, 776, 49, 12, 1086, 224, 239, 77, 36, 233, 180, 187, 215, 6, 129, 45, 357, 110, 170, 322, 153, 103, 106, 442, 742, 118, 191, 330, 742, 244, 742, 160, 89, 278, 106, 424, 91, 145, 592, 107, 259, 380, 178, 742, 715, 6, 235, 113, 549, 580, 147, 130, 186, 393, 510, 489, 68, 221, 193, 820, 326, 436, 1464, 584, 116, 248, 1464, 1318, 1367, 61, 1464, 104, 113, 203, 371, 408, 115, 330, 176, 1464, 1464, 128, 424, 133, 108, 11, 104, 929, 997, 172, 997, 416, 376, 306, 187, 78, 232, 5, 90, 193, 183, 197, 524, 997, 99, 373, 289, 118, 192, 243, 231, 323, 165, 997, 997, 1024, 274, 268, 370, 259, 151, 186, 259, 1025, 182, 239, 400, 137, 81, 180, 184, 1026, 302, 158, 507, 140, 37, 235, 103, 364, 770, 83, 14, 135, 518, 1025, 107, 811, 339, 383, 13, 189, 165, 309, 233, 814, 254, 87, 278, 368, 222, 755, 319, 375, 814, 374, 184, 534, 219, 302, 337, 808, 152, 814, 22, 341, 419, 458, 6, 235, 130, 183, 13, 500, 377, 665, 621, 119, 239, 123, 267, 177, 12, 813, 189, 172, 101, 102, 140, 112, 206, 715, 77, 749, 158, 168, 363, 1590, 412, 224, 200, 66, 186, 107, 1026, 73, 115, 106, 251, 1027, 320, 200, 319, 421, 746, 363, 88, 224, 340, 369, 118, 127, 81, 685, 832, 128, 290, 128, 67, 688, 128, 3, 113, 4, 226, 96, 240, 241, 240, 136, 144, 240, 240, 189, 159, 240, 114, 240, 68, 150, 240, 240, 239, 56, 178, 240, 222, 184, 167, 239, 239, 202, 129, 240, 239, 112, 379, 186, 25, 876, 111, 1283, 446, 1283, 69, 307, 92, 141, 690, 1283, 178, 255, 16, 1283, 1282, 640, 198, 299, 16, 160, 115, 187, 1283, 322, 201, 222, 421, 113, 104, 217, 382, 1375, 276, 695, 281, 123, 124, 95, 266, 116, 62, 791, 1374, 662, 222, 235, 175, 78, 514, 187, 123, 90, 188, 154, 1375, 406, 1363, 1375, 597, 283, 683, 332, 60, 170, 515, 76, 134, 117, 108, 217, 682, 543, 4, 113, 41, 170, 259, 148, 94, 236, 67, 683, 683, 404, 261, 39, 202, 375, 73, 683, 9, 65, 46, 150, 197, 155, 108, 324, 70, 126, 864, 226, 560, 153, 276, 67, 256, 140, 530, 251, 199, 215, 915, 375, 197, 336, 419, 411, 176, 915, 134, 424, 63, 187, 676, 74, 175, 6, 71, 816, 279, 742, 173, 285, 12, 314, 155, 19, 222, 215, 135, 640, 511, 428, 239, 326, 133, 416, 588, 201, 40, 225, 127, 6, 331, 369, 244, 246, 767, 117, 1592, 195, 135, 286, 262, 104, 1593, 253, 383, 89, 122, 284, 80, 191, 278, 288, 113, 102, 308, 202, 411, 20, 90, 594, 131, 81, 1593, 716, 77, 73, 342, 81, 629, 621, 962, 220, 684, 962, 376, 89, 195, 369, 220, 305, 178, 92, 247, 962, 488, 94, 44, 668, 233, 275, 633, 196, 60, 7, 109]}, "baseline": {"name": "chosen", "wins": 249, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_kto_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 60359, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_kto_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_kto_llama13b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 04:55:07.931994", "total": 512, "seed": 0, "exp_name": "archangel_kto_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 340, "lengths": [78, 138, 169, 46, 319, 53, 132, 63, 1103, 137, 74, 201, 54, 69, 120, 58, 464, 112, 66, 83, 135, 173, 122, 25, 77, 183, 6, 404, 73, 166, 104, 256, 515, 321, 70, 176, 23, 162, 50, 128, 173, 164, 90, 65, 160, 65, 172, 118, 245, 54, 117, 166, 83, 232, 206, 155, 742, 621, 6, 128, 129, 51, 164, 12, 101, 59, 93, 216, 289, 85, 59, 35, 517, 616, 211, 77, 213, 84, 70, 276, 57, 121, 103, 430, 11, 198, 87, 196, 179, 65, 83, 231, 21, 15, 102, 784, 143, 117, 12, 47, 48, 23, 89, 751, 226, 149, 374, 104, 146, 87, 16, 38, 117, 230, 38, 115, 102, 143, 72, 97, 68, 156, 707, 129, 468, 127, 510, 167, 82, 879, 196, 67, 356, 597, 155, 72, 328, 82, 301, 200, 135, 44, 195, 310, 1026, 387, 264, 161, 224, 177, 113, 111, 284, 196, 42, 7, 115, 131, 349, 64, 75, 163, 224, 356, 145, 441, 135, 254, 23, 332, 67, 47, 105, 293, 74, 45, 116, 68, 405, 300, 115, 48, 40, 420, 366, 31, 6, 147, 95, 186, 79, 51, 35, 69, 263, 17, 98, 46, 264, 108, 75, 59, 300, 83, 747, 13, 125, 237, 213, 124, 50, 60, 137, 48, 237, 45, 121, 25, 217, 221, 100, 269, 84, 349, 99, 163, 37, 35, 44, 114, 169, 1001, 143, 37, 88, 184, 118, 356, 165, 102, 73, 24, 118, 777, 52, 111, 529, 72, 99, 203, 260, 75, 253, 163, 6, 165, 6, 684, 26, 171, 17, 498, 152, 80, 205, 26, 114, 126, 14, 81, 148, 150, 83, 117, 240, 82, 93, 230, 77, 55, 240, 240, 146, 122, 157, 240, 126, 39, 96, 317, 79, 6, 102, 77, 21, 206, 808, 108, 201, 358, 139, 333, 142, 178, 348, 33, 31, 123, 238, 128, 63, 7, 246, 70, 299, 89, 99, 104, 240, 293, 44, 52, 50, 170, 49, 30, 282, 297, 63, 46, 88, 577, 256, 174, 95, 1375, 243, 199, 111, 496, 101, 31, 126, 105, 484, 174, 82, 216, 135, 58, 147, 81, 520, 406, 683, 208, 162, 183, 98, 79, 134, 87, 171, 682, 40, 6, 165, 72, 259, 284, 607, 82, 330, 59, 315, 87, 393, 187, 70, 151, 282, 135, 305, 33, 90, 77, 30, 172, 51, 128, 831, 135, 210, 258, 178, 160, 88, 101, 118, 146, 57, 42, 196, 271, 111, 217, 915, 138, 297, 148, 36, 915, 37, 142, 196, 49, 1205, 68, 70, 39, 6, 37, 449, 199, 337, 331, 1220, 20, 227, 81, 222, 196, 97, 31, 70, 30, 130, 593, 419, 93, 10, 246, 163, 45, 84, 81, 6, 39, 293, 228, 305, 146, 126, 34, 88, 66, 180, 102, 176, 296, 142, 109, 103, 132, 83, 47, 149, 83, 125, 73, 90, 13, 90, 45, 19, 108, 53, 1593, 129, 85, 267, 40, 998, 371, 63, 118, 420, 41, 38, 67, 82, 143, 52, 208, 239, 87, 80, 277, 107, 351, 246, 30, 60, 111, 258, 145, 205, 248, 79, 53, 7, 281]}, "baseline": {"name": "chosen", "wins": 148, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_kto_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 35795, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_kto_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_kto_llama30b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 16, "gradient_accumulation_steps": 2, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 05:09:55.892303", "total": 512, "seed": 0, "exp_name": "archangel_ppo_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 20, "lengths": [1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 347, 1154, 1153, 1153, 1153, 148, 1153, 1153, 1153, 1153, 862, 1153, 197, 1153, 1153, 50, 487, 1153, 1153, 1153, 1153, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 829, 830, 830, 830, 830, 830, 830, 830, 830, 572, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 1521, 1522, 1522, 1521, 1522, 1513, 1522, 1522, 1522, 680, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 639, 545, 1522, 1374, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 941, 1232, 1232, 1215, 514, 1232, 1232, 1056, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1231, 425, 1232, 1232, 1232, 1232, 1232, 1231, 1231, 1232, 1232, 1232, 1232, 1232, 1232, 1231, 1232, 1232, 1024, 313, 1024, 1024, 1024, 1024, 1004, 1024, 1024, 1024, 1024, 1024, 751, 1024, 1024, 1014, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 478, 1024, 1026, 1024, 1024, 1024, 1024, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 505, 830, 830, 99, 830, 830, 830, 830, 830, 48, 830, 830, 830, 830, 8, 1622, 1622, 346, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1621, 632, 1622, 1622, 1622, 1622, 1622, 1622, 624, 1622, 554, 239, 1561, 1621, 557, 116, 1622, 1622, 1622, 1622, 1621, 1490, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1124, 1124, 1125, 1125, 501, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1124, 1125, 1125, 1124, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 457, 4, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 454, 463, 463, 463, 463, 463, 463, 246, 463, 463, 1340, 1340, 1340, 1340, 1340, 1331, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 779, 973, 1340, 1340, 1340, 1340, 286, 468, 1340, 1338, 1340, 1340, 1340, 913, 1340, 1434, 1432, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 222, 1434, 1434, 1007, 1434, 1434, 1434, 638, 1434, 779, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 288, 1434, 1434, 1434, 855, 855, 853, 855, 855, 855, 95, 855, 855, 855, 855, 211, 855, 855, 855, 855, 855, 852, 855, 855, 855, 855, 855, 855, 116, 855, 855, 855, 855, 451, 855, 855, 1031, 461, 1031, 1031, 1030, 1031, 1031, 472, 1031, 863, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1030, 1031, 1031, 1031, 1031, 1031, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1049, 1289, 1289, 1289, 444, 1290, 1289, 1289, 1289, 1289, 990, 1289, 1289, 1289, 951, 1289, 1289, 1289, 1289, 1283, 721, 1289, 1289, 347, 1289, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1637, 448, 1638, 1638, 1636, 1638, 1638, 1638, 992, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 93, 1638, 1638, 1638, 1052, 1638, 1101, 1102, 1102, 1099, 1102, 1101, 1102, 789, 1102, 463, 1102, 582, 1102, 1102, 1102, 1102, 1102, 1101, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 761, 1102, 1102, 1102, 1102, 1102]}, "baseline": {"name": "chosen", "wins": 481, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_ppo_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 54309, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_ppo_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_ppo_pythia1-4b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 05:25:36.905099", "total": 512, "seed": 0, "exp_name": "archangel_ppo_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 38, "lengths": [1153, 1153, 1153, 1153, 1153, 1153, 1153, 525, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1152, 1153, 1152, 1153, 1153, 1153, 285, 1153, 1153, 156, 1153, 1153, 1153, 223, 1153, 1153, 1153, 576, 830, 341, 830, 830, 830, 830, 386, 830, 830, 830, 830, 199, 402, 830, 344, 830, 830, 830, 358, 829, 830, 545, 830, 830, 830, 830, 830, 306, 830, 830, 830, 1522, 1522, 1522, 309, 1522, 1522, 1522, 1522, 1522, 1508, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 471, 1163, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 1522, 764, 328, 1522, 1522, 1232, 1232, 95, 1232, 354, 1232, 1232, 1232, 1232, 1232, 1232, 220, 1232, 1232, 151, 1232, 1232, 1232, 384, 1232, 1232, 1232, 1232, 1232, 1231, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1024, 1024, 390, 749, 1024, 1024, 742, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 525, 1026, 1024, 523, 602, 1024, 1024, 1024, 1024, 1026, 1024, 830, 830, 830, 263, 830, 830, 830, 596, 830, 829, 830, 830, 830, 740, 830, 830, 830, 222, 830, 830, 830, 830, 404, 829, 830, 826, 791, 830, 830, 485, 830, 830, 1379, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1621, 1622, 1622, 1622, 1182, 389, 1622, 361, 792, 1622, 570, 764, 1622, 1621, 1622, 1622, 1622, 1596, 1622, 1622, 1622, 1622, 1622, 593, 511, 1125, 958, 375, 1125, 581, 424, 1125, 1125, 1125, 347, 1125, 1124, 1125, 1125, 1125, 1125, 1125, 870, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 346, 1125, 693, 1125, 1125, 4, 463, 463, 463, 463, 441, 463, 457, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 112, 169, 463, 463, 415, 463, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1219, 1334, 1340, 1340, 1340, 984, 1340, 1340, 1340, 1334, 245, 1340, 1078, 1340, 1340, 945, 1340, 863, 1340, 1340, 1340, 180, 1340, 1340, 1340, 1151, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 304, 1434, 1071, 1434, 1053, 1233, 1434, 1434, 1434, 504, 1434, 1434, 622, 1434, 1431, 1420, 1434, 1434, 1434, 1434, 1434, 399, 855, 855, 855, 203, 855, 855, 73, 855, 855, 853, 855, 855, 261, 855, 855, 855, 855, 855, 855, 855, 855, 450, 531, 855, 855, 855, 855, 852, 854, 855, 855, 855, 1031, 1031, 1031, 1031, 1028, 1031, 1031, 574, 1031, 1030, 1031, 1031, 1031, 1031, 989, 1031, 1031, 1031, 1032, 1031, 191, 1012, 243, 1031, 880, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1289, 619, 101, 1289, 11, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 567, 1222, 1289, 1289, 1271, 110, 1203, 1289, 672, 1290, 1289, 1289, 1247, 1286, 176, 1173, 626, 1289, 1289, 414, 820, 1638, 1638, 1638, 1636, 1082, 401, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 284, 1638, 1638, 1637, 1638, 1638, 1638, 1638, 1638, 1564, 1637, 252, 1466, 1638, 1637, 1638, 1102, 332, 400, 1102, 1102, 1102, 802, 1102, 1102, 985, 1102, 870, 1102, 1102, 1102, 1102, 1102, 680, 1102, 1102, 1102, 746, 1102, 1102, 518, 1103, 1102, 520, 1102, 1102, 357, 1102]}, "baseline": {"name": "chosen", "wins": 456, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_ppo_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 39435, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_ppo_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_ppo_pythia2-8b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 05:41:18.869840", "total": 512, "seed": 0, "exp_name": "archangel_ppo_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 46, "lengths": [1153, 1153, 319, 528, 1153, 1153, 1153, 1071, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 244, 1153, 1153, 1152, 1153, 1153, 1153, 1153, 1153, 1153, 354, 1153, 1153, 1153, 1153, 1153, 830, 830, 830, 830, 489, 830, 829, 830, 830, 830, 830, 830, 830, 830, 830, 830, 823, 830, 830, 830, 830, 508, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 1017, 1522, 1522, 1521, 1522, 1522, 1522, 1522, 533, 720, 1522, 1522, 1522, 54, 1522, 1236, 1522, 1522, 1521, 1522, 1522, 1522, 1522, 1522, 364, 1522, 1522, 1522, 1522, 1522, 1522, 183, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1231, 1232, 1232, 1232, 1232, 1232, 1232, 1223, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 599, 1232, 1232, 1232, 1024, 1024, 395, 1024, 1024, 1024, 1024, 339, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 652, 655, 1024, 1024, 1024, 1024, 281, 1024, 575, 1024, 1023, 1024, 1021, 1024, 1024, 1024, 1024, 830, 445, 830, 830, 324, 830, 830, 826, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 505, 116, 830, 830, 830, 829, 109, 830, 153, 801, 720, 830, 830, 1621, 1621, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1619, 1120, 1622, 1622, 1622, 273, 59, 1622, 1622, 1622, 1560, 1622, 1622, 1622, 1622, 324, 970, 261, 1622, 1622, 1622, 1622, 877, 1125, 1125, 1125, 1125, 344, 1125, 933, 1125, 1125, 1125, 1124, 595, 325, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 401, 1125, 1125, 1125, 1125, 1125, 550, 1125, 742, 1125, 579, 1125, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 58, 463, 463, 451, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 355, 1340, 1340, 1340, 23, 1340, 1340, 30, 1340, 1338, 769, 1340, 201, 1340, 653, 1340, 1146, 1340, 156, 1340, 1340, 1340, 1340, 734, 5, 1340, 907, 1340, 1340, 1340, 137, 1340, 1340, 669, 1084, 733, 1434, 1434, 1434, 1434, 1356, 1434, 1434, 1434, 1054, 689, 1430, 1434, 1434, 589, 448, 1434, 1434, 1434, 1237, 1434, 1434, 1434, 1006, 1434, 724, 1434, 1434, 1082, 1434, 855, 855, 854, 855, 357, 855, 855, 855, 480, 855, 855, 720, 855, 855, 855, 715, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, 1031, 307, 1031, 639, 532, 1031, 1027, 1031, 1031, 1031, 1031, 782, 1033, 1031, 1031, 1031, 797, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 245, 1031, 1289, 1289, 1289, 82, 1289, 214, 393, 1289, 1289, 1289, 1289, 1289, 610, 1289, 606, 1289, 1289, 389, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 433, 1288, 1289, 132, 1289, 1289, 1289, 1638, 1172, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 343, 1638, 1638, 1638, 1317, 1637, 1638, 1638, 1638, 596, 1638, 1637, 1638, 1638, 1638, 1637, 1638, 96, 1638, 1638, 1102, 867, 1102, 1102, 1102, 1094, 1102, 1102, 1102, 1102, 1102, 1102, 188, 1101, 1102, 265, 1100, 1102, 1102, 1102, 358, 1102, 1102, 789, 1102, 401, 1102, 1102, 1102, 1102, 1102, 1102]}, "baseline": {"name": "chosen", "wins": 450, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_ppo_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 48663, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_ppo_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_ppo_pythia6-9b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 05:57:05.623370", "total": 512, "seed": 0, "exp_name": "archangel_ppo_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 42, "lengths": [1153, 1014, 1153, 1153, 1153, 539, 1153, 1153, 740, 1153, 1152, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1154, 1152, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 133, 766, 830, 287, 830, 830, 830, 830, 830, 830, 480, 292, 39, 790, 830, 830, 830, 830, 830, 303, 830, 830, 830, 830, 830, 830, 830, 830, 831, 830, 829, 830, 553, 189, 1522, 1522, 1044, 1521, 1522, 1522, 1522, 1522, 1008, 1522, 628, 1522, 1522, 1522, 1522, 1521, 265, 755, 1522, 687, 1522, 1522, 1522, 1524, 1521, 1522, 1522, 30, 1522, 1175, 1232, 1232, 52, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 27, 459, 1233, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1232, 1231, 1232, 894, 1232, 1232, 1232, 1232, 1024, 1024, 770, 681, 1024, 1024, 1024, 1022, 1023, 1024, 649, 1024, 1024, 415, 1024, 1002, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 872, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 392, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 390, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 1622, 1622, 1622, 513, 1622, 1622, 543, 1622, 1622, 1622, 1622, 1622, 1136, 1622, 859, 1622, 845, 461, 1622, 1622, 1623, 239, 1607, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 1622, 465, 1125, 388, 1125, 1125, 138, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 1125, 364, 1125, 1125, 1125, 1125, 1123, 831, 1125, 1125, 311, 1125, 1125, 1125, 513, 138, 1125, 69, 1125, 4, 98, 462, 465, 463, 463, 463, 463, 463, 463, 286, 463, 317, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 215, 463, 463, 463, 463, 463, 257, 463, 463, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1012, 1341, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 878, 1340, 1340, 1340, 1340, 220, 284, 1212, 483, 1340, 1339, 243, 1340, 1340, 1340, 1434, 1434, 1434, 1434, 1434, 587, 1434, 1434, 1434, 1434, 373, 379, 1152, 1434, 985, 1434, 1434, 1434, 1434, 314, 693, 1434, 1019, 1434, 1434, 959, 1434, 746, 1434, 1434, 1434, 1434, 855, 855, 855, 237, 408, 757, 855, 855, 855, 855, 855, 364, 855, 183, 855, 334, 855, 855, 855, 855, 855, 855, 586, 855, 855, 855, 855, 855, 855, 385, 855, 855, 1031, 1031, 1031, 1031, 1031, 1031, 1028, 1031, 1031, 278, 1031, 398, 1031, 996, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 893, 0, 1031, 1031, 1031, 718, 1031, 1031, 1031, 1031, 272, 366, 1289, 195, 1289, 155, 26, 1289, 257, 1289, 1289, 1289, 526, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1289, 1288, 1289, 1289, 685, 1289, 1289, 1289, 935, 1289, 1638, 1638, 1638, 1638, 715, 1638, 615, 1638, 357, 1638, 710, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1638, 1639, 1638, 555, 874, 860, 1638, 764, 1638, 1638, 1638, 1102, 377, 116, 1102, 1102, 1102, 633, 1102, 1102, 1102, 1102, 722, 1102, 1102, 475, 1102, 1102, 1102, 1102, 1102, 1102, 1102, 1101, 127, 1102, 1102, 1102, 31, 1094, 1102, 603, 1102]}, "baseline": {"name": "chosen", "wins": 455, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_ppo_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 43347, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_ppo_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_ppo_pythia12-0b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 06:13:08.117443", "total": 512, "seed": 0, "exp_name": "archangel_ppo_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 182, "lengths": [1086, 419, 101, 1085, 1086, 292, 1086, 42, 470, 674, 1086, 847, 1086, 1086, 1086, 899, 304, 1086, 1086, 329, 457, 753, 1086, 1086, 1086, 1086, 1086, 658, 627, 77, 904, 912, 742, 79, 742, 222, 449, 742, 488, 151, 742, 742, 605, 619, 742, 594, 609, 741, 742, 741, 742, 438, 741, 742, 265, 742, 742, 586, 22, 742, 741, 742, 398, 742, 1464, 222, 1464, 1464, 1145, 965, 1464, 1463, 82, 577, 1464, 297, 1463, 1464, 449, 42, 899, 1464, 1464, 269, 9, 1464, 1463, 547, 1463, 1464, 621, 472, 1464, 14, 1462, 1464, 374, 415, 204, 37, 707, 117, 567, 997, 240, 978, 821, 885, 997, 792, 43, 308, 997, 997, 247, 997, 372, 997, 997, 997, 997, 837, 998, 997, 997, 293, 689, 997, 1025, 1025, 262, 1025, 1026, 1025, 1025, 1025, 1025, 1025, 1025, 1025, 1025, 214, 162, 658, 1026, 1025, 1023, 1025, 942, 1025, 497, 1025, 1025, 461, 1025, 216, 1025, 1024, 1025, 320, 814, 813, 814, 813, 813, 813, 608, 814, 814, 172, 104, 814, 814, 559, 559, 550, 814, 385, 149, 814, 181, 814, 814, 813, 813, 109, 37, 22, 814, 813, 813, 83, 290, 1018, 466, 279, 1590, 54, 1590, 1380, 145, 547, 531, 1590, 302, 1515, 1590, 200, 115, 501, 156, 319, 526, 195, 1590, 377, 744, 1171, 1590, 1590, 1322, 1590, 233, 1590, 150, 474, 1026, 1026, 426, 1026, 373, 1026, 1026, 1026, 1026, 1026, 1026, 832, 521, 1025, 1026, 221, 1026, 1025, 465, 129, 216, 574, 1026, 685, 1025, 81, 1026, 1026, 13, 74, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 192, 101, 239, 240, 240, 240, 240, 240, 163, 240, 239, 240, 240, 240, 240, 239, 240, 240, 240, 218, 22, 513, 1283, 1283, 14, 1282, 1283, 1283, 1283, 1283, 1283, 1267, 1283, 1282, 1283, 562, 758, 1282, 1283, 454, 204, 1283, 1283, 1283, 17, 1283, 1283, 1283, 1283, 1283, 877, 1234, 1283, 1374, 379, 442, 1374, 1375, 996, 1375, 1375, 1375, 1375, 1375, 1374, 1375, 570, 1375, 1370, 1375, 188, 1375, 1375, 461, 1375, 38, 1375, 1375, 340, 404, 290, 443, 583, 1375, 1375, 683, 683, 508, 671, 429, 683, 683, 240, 683, 683, 683, 682, 557, 99, 297, 436, 683, 683, 683, 69, 683, 680, 356, 683, 683, 404, 556, 226, 204, 434, 450, 614, 915, 915, 378, 414, 915, 915, 915, 915, 906, 563, 915, 915, 915, 914, 915, 915, 206, 711, 915, 915, 915, 915, 915, 914, 915, 293, 915, 808, 915, 914, 915, 519, 821, 1220, 251, 724, 678, 4, 368, 1219, 1219, 1220, 1220, 1220, 1220, 1220, 240, 1220, 360, 601, 332, 500, 1220, 1219, 607, 1001, 1220, 491, 1097, 958, 1073, 1220, 144, 1220, 234, 651, 1593, 1593, 243, 1593, 87, 277, 1593, 1592, 1593, 1593, 865, 193, 1592, 1448, 471, 1593, 1593, 1593, 1593, 1521, 1593, 3, 1593, 1593, 1593, 853, 137, 725, 41, 1593, 753, 962, 962, 962, 962, 360, 962, 963, 962, 961, 962, 962, 962, 962, 962, 38, 962, 962, 961, 962, 962, 505, 962, 962, 962, 962, 595, 962, 962, 103, 962, 808]}, "baseline": {"name": "chosen", "wins": 307, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_ppo_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 47743, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_ppo_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_ppo_llama7b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 06:30:00.652105", "total": 512, "seed": 0, "exp_name": "archangel_ppo_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 198, "lengths": [1085, 1086, 1089, 1086, 1086, 249, 1086, 54, 1086, 1086, 1086, 1085, 1086, 1086, 518, 1086, 1086, 1086, 1086, 1086, 1086, 1085, 1086, 1086, 1086, 1086, 1086, 1085, 1086, 1086, 1086, 1085, 742, 742, 742, 742, 742, 742, 742, 742, 369, 742, 742, 742, 741, 742, 742, 741, 742, 741, 351, 742, 741, 742, 496, 742, 742, 741, 742, 741, 376, 742, 742, 742, 1370, 1464, 1464, 1458, 1464, 1464, 1464, 533, 459, 1095, 1464, 1464, 1464, 1464, 1464, 1464, 1463, 1464, 1464, 1464, 1464, 1464, 1464, 1465, 1463, 1464, 1464, 1464, 1464, 1464, 1463, 1462, 997, 997, 997, 606, 997, 997, 997, 997, 997, 997, 997, 655, 997, 997, 996, 996, 997, 997, 997, 997, 997, 997, 997, 997, 997, 997, 998, 997, 997, 997, 996, 345, 1024, 1025, 258, 1024, 1026, 248, 617, 1025, 1025, 1025, 1025, 1025, 1025, 1025, 1025, 1025, 1026, 1025, 1023, 507, 476, 492, 1025, 1025, 1025, 1024, 1025, 1025, 1025, 1024, 1025, 1025, 814, 813, 814, 814, 813, 813, 814, 814, 814, 321, 89, 814, 814, 814, 814, 814, 88, 814, 814, 814, 813, 814, 814, 167, 813, 814, 814, 357, 814, 813, 814, 814, 1588, 1590, 1590, 1590, 1590, 1590, 1590, 1590, 1590, 1590, 1590, 673, 1093, 1590, 1590, 1590, 1589, 1590, 775, 1590, 1590, 1590, 1590, 1342, 1590, 1590, 1590, 1590, 1590, 1590, 1450, 1590, 1026, 1026, 183, 1026, 1026, 1026, 669, 1026, 654, 1026, 1026, 1026, 1026, 1026, 1026, 459, 1026, 61, 1026, 1024, 926, 1026, 891, 1025, 1026, 570, 1026, 723, 307, 1026, 1026, 1026, 240, 240, 239, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 239, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 239, 240, 238, 226, 240, 240, 1283, 538, 1283, 448, 421, 1283, 1283, 1283, 1283, 1283, 336, 1283, 1283, 1035, 1283, 1283, 1282, 415, 1283, 1282, 1283, 1283, 1283, 26, 1283, 1283, 1283, 1283, 1282, 1202, 1283, 1283, 1374, 1375, 359, 1375, 1375, 1375, 1375, 1375, 1276, 568, 51, 423, 1375, 865, 1375, 1223, 1375, 836, 1375, 1260, 1375, 1375, 1375, 1375, 1375, 1375, 340, 1375, 670, 1375, 1375, 1375, 683, 683, 683, 683, 440, 683, 683, 683, 683, 683, 683, 342, 330, 683, 683, 683, 683, 682, 683, 683, 683, 683, 643, 159, 683, 683, 683, 683, 682, 682, 682, 683, 915, 915, 915, 915, 915, 915, 915, 915, 915, 212, 915, 915, 915, 800, 915, 915, 915, 694, 914, 808, 915, 915, 915, 915, 915, 915, 915, 914, 915, 914, 915, 913, 1220, 1220, 1220, 1220, 1220, 1220, 1219, 1219, 1219, 1220, 1220, 1220, 1220, 1220, 19, 1220, 1220, 1220, 1220, 1220, 494, 447, 1114, 1220, 1220, 1220, 1219, 1220, 1219, 1220, 1220, 1220, 1593, 1593, 1593, 1593, 1442, 1593, 422, 1593, 604, 409, 843, 1593, 1593, 1593, 488, 1593, 520, 1593, 1593, 1593, 963, 1592, 1593, 1592, 1593, 1593, 481, 1593, 661, 1593, 1593, 1593, 734, 460, 962, 962, 962, 962, 961, 962, 962, 961, 962, 742, 962, 962, 962, 961, 962, 962, 961, 962, 652, 858, 962, 962, 278, 962, 247, 962, 962, 962, 15, 582]}, "baseline": {"name": "chosen", "wins": 291, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_ppo_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 46817, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_ppo_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_ppo_llama13b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 06:46:23.448931", "total": 512, "seed": 0, "exp_name": "archangel_ppo_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 261, "lengths": [386, 184, 1460, 1460, 880, 1460, 1460, 503, 893, 664, 521, 1459, 1460, 55, 498, 536, 45, 1086, 1086, 487, 1086, 382, 1086, 534, 333, 1086, 1086, 724, 1086, 717, 1080, 390, 1404, 1403, 465, 145, 935, 522, 422, 1095, 503, 1023, 46, 1404, 1002, 29, 1404, 240, 742, 194, 742, 742, 742, 742, 742, 742, 742, 742, 11, 742, 742, 381, 742, 742, 1511, 262, 1511, 1511, 1511, 1072, 1511, 898, 1511, 1512, 1504, 1511, 430, 1330, 1321, 560, 673, 1464, 1464, 1464, 1464, 250, 1464, 333, 30, 656, 286, 1463, 1464, 1464, 1464, 1464, 533, 318, 997, 997, 997, 131, 621, 997, 996, 613, 997, 997, 284, 211, 14, 905, 682, 1485, 109, 787, 731, 281, 1485, 1485, 271, 228, 682, 1485, 999, 1484, 1485, 1485, 501, 1574, 1574, 1574, 1572, 1574, 1574, 1574, 1545, 1574, 1574, 1574, 1574, 1573, 1574, 184, 1026, 297, 1025, 1025, 1023, 1024, 1025, 1025, 1025, 901, 1024, 1025, 1025, 1025, 712, 91, 810, 813, 814, 388, 814, 814, 361, 814, 227, 333, 534, 814, 814, 814, 814, 814, 1455, 259, 1455, 976, 1455, 1455, 1455, 1455, 380, 729, 4, 1455, 251, 1455, 488, 1455, 1588, 1590, 1590, 1589, 1287, 654, 928, 554, 120, 1590, 1590, 1589, 1590, 16, 315, 1590, 1057, 1711, 35, 254, 409, 456, 851, 107, 354, 679, 459, 1711, 1323, 1711, 127, 1711, 1026, 1026, 404, 169, 1026, 534, 1026, 400, 696, 1026, 1026, 249, 138, 1026, 1026, 352, 1289, 1289, 1289, 1289, 56, 260, 1289, 1289, 866, 991, 1289, 238, 769, 1289, 861, 1289, 6, 1358, 293, 1358, 413, 1196, 555, 1358, 1357, 1358, 1358, 322, 1358, 77, 784, 1358, 239, 138, 240, 240, 240, 240, 110, 240, 240, 239, 240, 240, 240, 240, 240, 240, 1524, 1525, 768, 53, 921, 241, 1032, 238, 1524, 318, 1525, 200, 795, 1525, 1525, 1525, 1283, 750, 138, 514, 1283, 764, 370, 355, 29, 918, 1283, 1283, 325, 404, 358, 332, 1375, 1374, 446, 1375, 1121, 1375, 1125, 782, 158, 1022, 111, 1375, 1119, 100, 372, 1375, 852, 393, 1602, 1602, 111, 872, 1602, 1602, 1602, 1602, 1602, 871, 1602, 1602, 363, 1065, 683, 682, 625, 291, 683, 683, 683, 683, 197, 610, 98, 682, 167, 7, 682, 682, 1443, 1444, 1444, 1444, 769, 61, 299, 1361, 1444, 278, 754, 294, 1026, 1444, 1444, 986, 72, 459, 380, 1316, 724, 572, 1316, 244, 185, 340, 940, 196, 868, 193, 373, 1315, 915, 914, 364, 469, 915, 915, 94, 546, 645, 915, 915, 914, 915, 915, 915, 167, 1220, 1220, 383, 605, 1220, 34, 1219, 1107, 1220, 1220, 1220, 23, 1220, 1220, 23, 302, 1363, 1087, 238, 1357, 1363, 483, 1041, 1287, 1363, 1363, 993, 193, 1363, 653, 17, 1363, 625, 1605, 1233, 180, 1606, 289, 839, 1606, 1605, 1606, 1606, 728, 1606, 1604, 1606, 631, 772, 1592, 1593, 1593, 1592, 25, 1285, 1593, 891, 481, 28, 124, 1593, 707, 329, 1593, 1528, 1528, 260, 1528, 72, 721, 316, 1351, 1528, 1528, 1528, 353, 1528, 1528, 639, 675, 962, 962, 962, 355, 962, 962, 962, 172, 962, 962, 962, 962, 963, 962, 962, 850]}, "baseline": {"name": "chosen", "wins": 232, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_ppo_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 50885, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_ppo_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_ppo_llama30b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 07:02:28.789814", "total": 512, "seed": 0, "exp_name": "archangel_slic_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 35, "lengths": [1152, 373, 1140, 805, 1153, 1153, 679, 1153, 1153, 1153, 801, 793, 99, 572, 840, 677, 1153, 386, 219, 423, 628, 336, 1153, 621, 276, 1153, 227, 1153, 495, 1153, 732, 347, 830, 257, 830, 701, 830, 325, 127, 830, 406, 830, 350, 262, 830, 830, 830, 256, 830, 830, 830, 830, 830, 828, 166, 786, 830, 830, 205, 830, 341, 830, 830, 265, 945, 59, 867, 1521, 457, 1522, 923, 1234, 1522, 453, 1522, 1522, 120, 426, 184, 834, 359, 1522, 1522, 226, 1521, 1522, 1522, 1079, 1522, 251, 1522, 1522, 1524, 227, 49, 433, 1229, 576, 40, 1232, 1232, 1232, 745, 1232, 1232, 1232, 1163, 405, 1232, 976, 21, 66, 356, 358, 1232, 929, 1232, 637, 683, 387, 647, 1232, 1232, 1232, 1232, 724, 1232, 768, 1024, 708, 189, 347, 1024, 1023, 168, 907, 1024, 242, 1024, 1024, 118, 254, 1024, 660, 1024, 1024, 327, 1024, 1024, 603, 1024, 1024, 790, 614, 1024, 988, 1024, 1023, 1024, 978, 300, 627, 830, 830, 830, 721, 830, 66, 304, 125, 830, 432, 401, 830, 390, 830, 830, 830, 297, 830, 406, 709, 831, 830, 259, 221, 353, 830, 830, 830, 830, 747, 1619, 1622, 344, 1529, 1622, 1375, 581, 313, 1622, 1622, 1622, 1622, 882, 44, 351, 1622, 77, 412, 822, 1620, 1009, 170, 373, 1622, 410, 202, 1622, 238, 1120, 409, 1622, 327, 1125, 396, 1125, 1125, 816, 145, 791, 584, 660, 506, 335, 1125, 1125, 978, 369, 755, 1125, 458, 1125, 293, 746, 940, 1125, 1125, 1125, 459, 220, 92, 1125, 292, 464, 809, 463, 378, 363, 463, 128, 463, 173, 464, 463, 463, 77, 463, 463, 388, 462, 463, 463, 463, 463, 463, 463, 449, 463, 463, 463, 129, 463, 463, 463, 463, 392, 463, 950, 1340, 523, 344, 1340, 451, 399, 1341, 1340, 1340, 1340, 1340, 1340, 791, 1340, 1338, 851, 141, 528, 1340, 718, 1340, 1340, 1340, 433, 983, 1340, 572, 1089, 607, 1340, 207, 300, 1013, 183, 1434, 767, 634, 255, 1434, 1434, 116, 1231, 551, 371, 1433, 1434, 1434, 1434, 1434, 681, 644, 58, 634, 22, 1434, 1434, 1434, 667, 381, 393, 1434, 1434, 83, 855, 855, 474, 855, 720, 298, 119, 444, 155, 530, 855, 855, 854, 76, 855, 638, 301, 426, 855, 855, 855, 236, 855, 855, 729, 855, 855, 854, 779, 67, 210, 256, 1031, 1031, 1031, 668, 1031, 485, 762, 418, 1031, 663, 1031, 271, 1031, 531, 1031, 914, 54, 1031, 713, 485, 737, 1031, 219, 926, 1031, 166, 152, 115, 659, 542, 476, 526, 1289, 521, 565, 48, 576, 20, 841, 582, 511, 1288, 757, 3, 1289, 538, 94, 1289, 1289, 1289, 1289, 372, 882, 1289, 173, 1289, 1289, 1063, 602, 1289, 1288, 446, 1188, 1289, 1638, 201, 1170, 1638, 1430, 96, 446, 655, 612, 1638, 325, 1637, 1638, 219, 260, 1638, 1531, 347, 1638, 475, 1638, 135, 519, 1265, 293, 1638, 355, 569, 1638, 1588, 209, 1638, 473, 767, 191, 1102, 966, 1018, 147, 679, 231, 1102, 266, 1102, 1102, 268, 328, 158, 672, 322, 936, 197, 1101, 998, 681, 561, 377, 1102, 1101, 1102, 565, 311, 1102, 249]}, "baseline": {"name": "chosen", "wins": 463, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_slic_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 45197, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_slic_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_slic_pythia1-4b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 07:18:52.817736", "total": 512, "seed": 0, "exp_name": "archangel_slic_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 71, "lengths": [1153, 549, 866, 320, 552, 1153, 1153, 51, 1153, 556, 1153, 551, 75, 1151, 921, 1153, 615, 1153, 102, 325, 464, 192, 1153, 1153, 765, 1153, 5, 1153, 128, 1153, 465, 44, 830, 128, 184, 830, 830, 830, 830, 830, 830, 830, 830, 830, 127, 481, 150, 830, 830, 830, 830, 342, 739, 830, 88, 238, 830, 381, 830, 830, 512, 830, 624, 829, 341, 556, 1509, 1316, 1522, 516, 730, 1049, 433, 1216, 129, 788, 1331, 392, 89, 834, 1522, 1522, 914, 1522, 79, 545, 238, 1170, 503, 1522, 140, 213, 1522, 832, 577, 1522, 1232, 761, 706, 587, 1232, 1232, 476, 1232, 1232, 1229, 1232, 625, 101, 1232, 42, 458, 921, 1232, 258, 715, 1171, 491, 1232, 653, 262, 1232, 244, 1232, 265, 1232, 1232, 265, 1024, 1024, 206, 1024, 728, 1023, 504, 893, 1024, 1024, 499, 1024, 1024, 408, 515, 371, 1024, 443, 1024, 1024, 1023, 459, 1024, 1024, 1024, 331, 551, 1024, 183, 1024, 1024, 1024, 830, 303, 829, 830, 830, 830, 829, 802, 830, 830, 830, 830, 282, 830, 830, 830, 830, 830, 830, 412, 830, 830, 466, 830, 830, 70, 7, 395, 679, 830, 830, 22, 405, 374, 565, 14, 668, 1622, 945, 963, 522, 1450, 660, 544, 1622, 80, 200, 880, 301, 308, 652, 725, 386, 1622, 465, 461, 1510, 243, 147, 1063, 1271, 1127, 557, 1620, 1125, 1125, 1125, 1125, 1125, 504, 1125, 1125, 578, 1125, 153, 629, 94, 275, 848, 891, 1125, 725, 1125, 96, 1121, 225, 1058, 521, 791, 1125, 283, 654, 581, 1125, 6, 1125, 463, 463, 463, 463, 162, 463, 399, 462, 463, 463, 463, 463, 71, 463, 161, 463, 463, 338, 463, 463, 463, 463, 463, 463, 347, 463, 463, 463, 463, 463, 235, 78, 1340, 152, 1190, 96, 684, 1340, 673, 1340, 1340, 1175, 1340, 1340, 573, 131, 294, 950, 961, 141, 372, 1340, 1340, 986, 513, 1340, 1340, 1340, 386, 416, 1340, 169, 632, 1340, 1067, 164, 67, 1434, 957, 649, 1033, 117, 1434, 1434, 293, 1434, 661, 999, 1434, 961, 345, 952, 174, 1238, 219, 1075, 622, 1434, 607, 453, 323, 707, 1434, 707, 1434, 1434, 855, 482, 855, 855, 224, 855, 111, 855, 855, 855, 855, 855, 596, 820, 800, 156, 855, 855, 855, 855, 855, 502, 855, 855, 456, 855, 136, 201, 157, 215, 855, 56, 1031, 179, 295, 1031, 1031, 1031, 1031, 362, 329, 657, 1031, 516, 1031, 219, 1031, 1031, 171, 468, 158, 1031, 730, 513, 576, 396, 1031, 165, 399, 788, 1031, 1031, 1031, 1031, 794, 346, 85, 557, 41, 1289, 1289, 1285, 1289, 1289, 1289, 3, 828, 717, 98, 1289, 656, 549, 119, 951, 1289, 1289, 1288, 507, 1289, 143, 1289, 589, 991, 125, 1289, 244, 187, 1638, 1638, 668, 797, 1638, 1561, 798, 178, 828, 513, 851, 574, 544, 614, 1019, 1638, 1638, 472, 502, 1638, 1638, 748, 1638, 199, 411, 447, 288, 518, 1638, 1638, 1638, 898, 1102, 1102, 941, 1102, 380, 147, 717, 1102, 153, 1102, 1102, 228, 226, 327, 193, 1006, 428, 651, 486, 1102, 1102, 1102, 56, 1102, 237, 1102, 1102, 1102, 732, 2, 290]}, "baseline": {"name": "chosen", "wins": 428, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_slic_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 38955, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_slic_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_slic_pythia2-8b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 07:36:01.517525", "total": 512, "seed": 0, "exp_name": "archangel_slic_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 71, "lengths": [1153, 1153, 391, 270, 1153, 1153, 1153, 1153, 1153, 1153, 276, 171, 1153, 716, 1049, 920, 739, 1153, 1153, 1153, 1153, 103, 1153, 1153, 1153, 594, 15, 310, 1153, 347, 211, 1153, 830, 391, 830, 830, 830, 830, 830, 830, 830, 830, 830, 254, 24, 830, 830, 394, 829, 771, 781, 559, 830, 830, 316, 830, 829, 819, 6, 830, 830, 830, 830, 830, 1522, 1522, 1522, 128, 1153, 210, 1522, 997, 614, 1061, 1522, 1392, 1522, 1522, 497, 349, 889, 1522, 811, 288, 1452, 1278, 1499, 466, 335, 1522, 1522, 548, 63, 939, 1522, 1522, 890, 1232, 1232, 743, 1232, 1030, 1232, 1232, 1232, 1232, 1232, 1232, 787, 418, 41, 914, 1232, 147, 149, 1232, 1232, 1232, 1232, 1232, 1161, 1009, 220, 1232, 997, 1232, 1232, 1040, 1024, 1024, 1010, 1021, 1024, 1024, 798, 1024, 886, 1024, 1024, 293, 344, 1024, 108, 160, 1024, 1024, 321, 1023, 1023, 1024, 452, 1024, 211, 841, 78, 1024, 1023, 108, 1024, 1024, 308, 728, 830, 819, 830, 290, 830, 830, 830, 748, 830, 599, 830, 830, 830, 699, 829, 830, 830, 830, 830, 830, 830, 790, 830, 60, 63, 830, 830, 417, 830, 832, 1622, 1577, 1248, 1622, 1622, 1308, 1622, 1612, 1319, 846, 1622, 573, 1565, 60, 1621, 666, 163, 1517, 494, 1622, 111, 181, 627, 235, 445, 1622, 572, 395, 1622, 545, 1148, 623, 1125, 478, 65, 931, 1125, 1125, 1125, 728, 1125, 1125, 347, 883, 171, 1125, 1125, 1125, 1124, 21, 880, 1125, 240, 538, 1125, 1124, 1125, 1041, 444, 1125, 623, 1004, 562, 1100, 4, 463, 463, 463, 463, 39, 463, 437, 463, 122, 321, 170, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 463, 201, 463, 463, 20, 373, 297, 1340, 150, 273, 1051, 1340, 1340, 332, 1340, 1339, 1340, 1340, 1340, 1194, 1339, 1340, 546, 1340, 87, 1340, 944, 639, 1340, 1340, 5, 827, 1248, 1340, 1340, 1340, 1340, 1336, 1340, 756, 1434, 431, 1434, 1434, 1434, 1434, 1118, 1434, 1434, 845, 1434, 318, 894, 1434, 1434, 743, 1434, 1434, 474, 1434, 1434, 213, 1434, 1434, 1429, 1434, 460, 1434, 202, 1434, 1434, 310, 855, 855, 855, 855, 829, 243, 714, 749, 855, 854, 855, 855, 6, 855, 855, 542, 855, 855, 855, 855, 855, 855, 855, 792, 855, 855, 855, 855, 855, 855, 147, 373, 1031, 1031, 1031, 1031, 1022, 1031, 671, 1031, 1031, 1031, 1031, 1031, 1031, 1031, 730, 1031, 1031, 1031, 1031, 336, 1031, 1031, 111, 396, 1031, 1031, 311, 134, 1029, 1031, 397, 1289, 426, 1289, 1289, 69, 1273, 1286, 1289, 485, 1289, 1289, 393, 874, 1289, 506, 1192, 1289, 614, 1289, 230, 508, 1289, 484, 491, 1289, 1289, 1289, 1289, 292, 1143, 298, 1247, 1638, 1638, 1638, 1638, 1632, 1146, 1638, 420, 647, 1413, 1606, 759, 1638, 1638, 1638, 151, 1638, 871, 1635, 1636, 1638, 1638, 1553, 105, 1638, 368, 1638, 348, 1638, 1638, 986, 1638, 1102, 1102, 1102, 1102, 554, 338, 1102, 1102, 1102, 1101, 1102, 483, 76, 459, 455, 1003, 1102, 1102, 1071, 1102, 1102, 1102, 629, 1102, 160, 1102, 1102, 1102, 1102, 676, 1101, 48]}, "baseline": {"name": "chosen", "wins": 428, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_slic_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 37731, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_slic_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_slic_pythia6-9b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 07:52:56.443303", "total": 512, "seed": 0, "exp_name": "archangel_slic_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 54, "lengths": [478, 628, 274, 1153, 1153, 1153, 834, 1153, 1004, 1153, 224, 1153, 110, 310, 1153, 1104, 1153, 1153, 1153, 1153, 1153, 169, 972, 692, 123, 1153, 1153, 1153, 242, 178, 1153, 1153, 705, 110, 830, 287, 830, 830, 830, 391, 830, 830, 830, 277, 233, 830, 830, 830, 830, 830, 830, 197, 830, 108, 830, 830, 616, 664, 100, 343, 144, 830, 830, 830, 1307, 302, 1522, 1522, 1521, 257, 1522, 1522, 1522, 1522, 206, 955, 268, 1522, 1522, 915, 1499, 348, 607, 900, 1522, 1522, 121, 1522, 379, 1522, 438, 1522, 57, 435, 1522, 1211, 1232, 341, 1232, 1232, 1232, 1031, 1232, 1232, 167, 1232, 296, 42, 210, 1232, 31, 742, 1232, 1232, 1232, 1232, 1232, 1231, 275, 1232, 219, 1231, 1232, 1232, 1232, 360, 214, 1230, 1024, 1024, 1024, 1024, 923, 1024, 877, 1024, 1024, 224, 479, 752, 1024, 27, 173, 1024, 1024, 1024, 1024, 1024, 1024, 566, 1024, 1024, 560, 334, 205, 122, 1024, 1024, 1024, 1024, 830, 830, 830, 125, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 830, 344, 830, 830, 830, 297, 830, 830, 97, 830, 830, 119, 145, 29, 625, 830, 830, 830, 1622, 1622, 963, 225, 1622, 1622, 415, 1622, 304, 1622, 1622, 1622, 1622, 573, 1622, 253, 303, 1622, 1622, 91, 618, 1621, 1622, 589, 289, 1622, 1621, 1622, 1622, 1621, 281, 712, 1125, 1125, 1125, 169, 1125, 555, 370, 361, 1125, 516, 1125, 481, 1125, 1125, 1125, 303, 511, 1125, 1125, 1125, 822, 353, 1125, 1125, 1125, 1125, 1125, 75, 1125, 1063, 19, 1125, 4, 463, 463, 280, 462, 463, 54, 463, 463, 463, 302, 410, 463, 463, 463, 463, 462, 463, 290, 463, 463, 96, 463, 463, 194, 463, 463, 463, 274, 148, 134, 463, 57, 1340, 1339, 81, 153, 1340, 399, 1340, 771, 252, 1213, 1340, 779, 581, 1340, 1340, 1340, 400, 107, 408, 1340, 47, 1340, 22, 1341, 1340, 1339, 1172, 873, 144, 135, 1340, 1434, 1434, 1434, 907, 1434, 1434, 1434, 1434, 1434, 1434, 1434, 379, 114, 758, 258, 465, 1185, 560, 1434, 1434, 1434, 1434, 540, 1434, 1434, 1434, 83, 1434, 1434, 1434, 154, 1434, 455, 855, 855, 172, 853, 855, 855, 855, 855, 305, 855, 422, 855, 855, 732, 855, 855, 228, 855, 855, 855, 855, 855, 855, 855, 855, 647, 855, 855, 270, 546, 18, 539, 618, 1031, 1031, 1031, 210, 1031, 1031, 1031, 466, 153, 752, 1031, 1031, 1031, 83, 557, 262, 949, 1031, 1031, 1031, 0, 882, 1031, 1031, 1031, 1031, 372, 827, 548, 635, 1289, 1289, 320, 1289, 1289, 31, 1289, 247, 1289, 1289, 1289, 1289, 444, 1288, 1289, 1076, 1289, 1289, 1126, 99, 1289, 969, 1289, 101, 1289, 69, 324, 1289, 1289, 1289, 1289, 1286, 1338, 450, 1178, 1638, 1624, 1638, 1638, 1638, 1470, 1638, 242, 1639, 1638, 59, 1616, 248, 1627, 1638, 1638, 1638, 1638, 1638, 426, 49, 1638, 1638, 1638, 956, 818, 1638, 1638, 1638, 1101, 1102, 589, 1102, 191, 67, 1102, 1102, 1102, 1102, 1102, 1094, 209, 1102, 1102, 1102, 1102, 1101, 1102, 700, 1102, 1102, 1101, 506, 327, 1102, 1102, 268, 1102, 285, 4, 1102]}, "baseline": {"name": "chosen", "wins": 440, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_slic_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 52903, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_slic_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_slic_pythia12-0b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 08:08:24.956757", "total": 512, "seed": 0, "exp_name": "archangel_slic_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 171, "lengths": [380, 910, 77, 144, 278, 141, 140, 112, 322, 222, 544, 241, 60, 231, 1024, 268, 144, 183, 158, 118, 70, 126, 252, 57, 65, 184, 109, 1085, 101, 93, 174, 253, 742, 287, 123, 128, 27, 265, 295, 202, 179, 378, 439, 248, 474, 85, 325, 742, 742, 186, 89, 183, 113, 125, 115, 74, 742, 219, 92, 130, 309, 270, 135, 106, 178, 115, 176, 73, 567, 125, 100, 253, 116, 323, 337, 75, 171, 123, 195, 163, 102, 111, 94, 87, 31, 155, 144, 423, 332, 48, 165, 65, 105, 55, 162, 364, 97, 92, 705, 152, 34, 997, 356, 996, 85, 160, 150, 169, 723, 262, 54, 85, 300, 119, 107, 253, 85, 109, 91, 122, 86, 194, 967, 115, 383, 113, 394, 172, 1025, 1023, 161, 271, 317, 206, 390, 152, 102, 172, 797, 250, 259, 86, 154, 115, 1026, 224, 161, 132, 320, 94, 75, 1025, 555, 258, 90, 31, 367, 206, 172, 162, 284, 551, 814, 156, 814, 467, 342, 303, 80, 249, 124, 93, 454, 278, 183, 369, 114, 156, 310, 622, 181, 60, 95, 117, 342, 21, 107, 246, 259, 129, 283, 74, 57, 116, 157, 183, 321, 134, 532, 113, 159, 196, 156, 95, 236, 33, 91, 239, 151, 350, 55, 103, 147, 120, 321, 149, 458, 67, 203, 184, 636, 275, 257, 336, 63, 67, 124, 162, 76, 281, 1026, 324, 89, 20, 337, 113, 120, 347, 178, 279, 581, 202, 553, 106, 225, 116, 735, 75, 170, 212, 162, 71, 305, 225, 20, 205, 6, 154, 202, 240, 52, 240, 240, 208, 240, 96, 193, 239, 29, 89, 76, 240, 123, 136, 240, 100, 86, 189, 61, 240, 240, 137, 239, 209, 75, 241, 240, 110, 151, 392, 82, 113, 158, 131, 135, 125, 161, 297, 322, 152, 483, 803, 178, 442, 157, 27, 168, 126, 172, 224, 60, 35, 463, 75, 85, 256, 67, 280, 134, 116, 57, 675, 43, 428, 225, 91, 939, 575, 62, 180, 83, 507, 118, 107, 166, 110, 203, 110, 170, 126, 130, 31, 150, 86, 106, 153, 173, 138, 231, 94, 207, 55, 195, 84, 679, 52, 156, 16, 111, 87, 99, 63, 154, 558, 96, 130, 229, 91, 247, 182, 371, 117, 264, 115, 268, 248, 538, 90, 535, 156, 264, 681, 149, 85, 88, 100, 283, 163, 584, 72, 377, 308, 369, 324, 250, 202, 114, 510, 75, 84, 89, 75, 154, 152, 117, 600, 492, 143, 713, 129, 161, 58, 25, 102, 173, 156, 255, 275, 140, 81, 59, 99, 193, 115, 444, 240, 502, 1220, 221, 207, 69, 87, 113, 98, 89, 114, 251, 227, 336, 109, 966, 1220, 183, 54, 108, 74, 28, 217, 331, 350, 177, 95, 286, 75, 91, 127, 100, 216, 623, 83, 360, 425, 207, 192, 469, 177, 170, 105, 103, 67, 85, 248, 48, 92, 144, 161, 898, 138, 189, 1192, 491, 60, 960, 154, 123, 152, 126, 963, 207, 144, 262, 517, 139, 215, 169, 181, 144, 221, 55, 817, 147, 57, 90, 454, 425, 500, 238, 327, 86, 51, 40, 97]}, "baseline": {"name": "chosen", "wins": 317, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_slic_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 36409, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_slic_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_slic_llama7b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 08:23:51.688078", "total": 512, "seed": 0, "exp_name": "archangel_slic_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 194, "lengths": [161, 219, 56, 62, 123, 147, 97, 74, 397, 339, 179, 278, 121, 128, 204, 183, 265, 104, 147, 139, 331, 166, 289, 127, 399, 141, 126, 700, 102, 198, 305, 290, 512, 375, 170, 127, 227, 146, 147, 226, 258, 145, 346, 89, 354, 121, 129, 180, 406, 97, 92, 148, 283, 95, 200, 104, 614, 435, 90, 104, 152, 91, 174, 113, 223, 38, 102, 143, 560, 196, 63, 437, 293, 356, 226, 94, 207, 252, 135, 48, 96, 693, 99, 205, 92, 230, 268, 204, 394, 259, 212, 161, 268, 42, 156, 175, 130, 131, 78, 112, 90, 39, 113, 101, 91, 766, 281, 997, 110, 90, 35, 112, 460, 322, 69, 223, 87, 110, 58, 393, 101, 285, 438, 160, 418, 163, 997, 66, 163, 403, 91, 553, 1025, 658, 460, 109, 115, 103, 469, 198, 515, 93, 219, 389, 1026, 244, 186, 173, 127, 280, 143, 512, 488, 725, 105, 24, 108, 94, 576, 227, 462, 417, 475, 68, 139, 515, 102, 270, 142, 549, 237, 91, 255, 531, 158, 288, 221, 65, 560, 195, 326, 75, 46, 253, 603, 37, 49, 76, 171, 58, 135, 241, 117, 222, 160, 112, 238, 154, 192, 146, 181, 129, 191, 161, 308, 215, 79, 168, 79, 257, 315, 71, 105, 106, 586, 243, 242, 82, 372, 219, 133, 759, 1250, 331, 182, 284, 115, 47, 81, 100, 982, 289, 277, 103, 212, 112, 503, 351, 312, 329, 199, 98, 272, 89, 605, 104, 384, 55, 172, 1026, 436, 192, 736, 244, 74, 184, 6, 102, 187, 240, 236, 80, 161, 103, 240, 96, 240, 102, 104, 120, 161, 124, 171, 123, 239, 239, 190, 240, 57, 158, 106, 240, 241, 118, 127, 241, 240, 96, 164, 384, 502, 38, 89, 127, 1002, 275, 1283, 380, 485, 196, 152, 266, 612, 615, 256, 436, 108, 140, 204, 482, 220, 53, 172, 360, 153, 78, 424, 272, 254, 89, 186, 135, 165, 530, 126, 139, 194, 338, 146, 119, 102, 193, 398, 121, 230, 441, 397, 167, 220, 1375, 112, 107, 251, 758, 116, 577, 128, 1375, 437, 185, 878, 29, 642, 492, 683, 134, 174, 53, 65, 186, 143, 154, 175, 165, 116, 167, 216, 61, 442, 683, 211, 146, 278, 115, 278, 60, 220, 683, 227, 182, 417, 155, 219, 14, 99, 99, 70, 380, 86, 87, 494, 318, 219, 313, 915, 102, 186, 393, 67, 333, 160, 328, 524, 178, 114, 908, 722, 120, 531, 85, 160, 62, 221, 196, 471, 82, 251, 94, 98, 103, 74, 76, 335, 248, 442, 110, 1220, 53, 100, 119, 233, 265, 204, 117, 78, 209, 110, 341, 324, 73, 49, 567, 240, 341, 351, 154, 60, 384, 350, 126, 287, 117, 155, 58, 146, 196, 155, 81, 282, 516, 98, 172, 115, 113, 146, 160, 142, 275, 76, 105, 189, 226, 83, 107, 84, 103, 506, 98, 97, 310, 298, 75, 52, 344, 225, 147, 160, 148, 311, 230, 216, 323, 97, 130, 129, 137, 93, 493, 104, 944, 179, 120, 114, 180, 117, 79, 217, 96, 223, 88, 7, 218]}, "baseline": {"name": "chosen", "wins": 295, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_slic_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 57011, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_slic_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_slic_llama13b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 08:39:32.804015", "total": 512, "seed": 0, "exp_name": "archangel_slic_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 197, "lengths": [163, 114, 192, 282, 414, 122, 429, 194, 1460, 146, 187, 334, 66, 240, 918, 276, 417, 75, 271, 112, 213, 275, 191, 124, 145, 165, 313, 248, 133, 127, 275, 321, 364, 400, 175, 119, 89, 142, 238, 189, 187, 295, 157, 357, 519, 135, 565, 236, 435, 159, 141, 199, 141, 741, 264, 227, 577, 742, 74, 145, 137, 628, 320, 516, 189, 97, 136, 338, 128, 100, 243, 413, 119, 337, 104, 91, 264, 259, 204, 181, 102, 130, 1464, 624, 99, 175, 167, 221, 148, 111, 317, 97, 85, 84, 491, 254, 141, 246, 787, 151, 58, 217, 252, 232, 997, 675, 288, 167, 191, 114, 997, 127, 158, 194, 155, 96, 291, 191, 97, 181, 133, 220, 1485, 136, 248, 192, 366, 94, 331, 247, 277, 105, 409, 178, 413, 909, 84, 88, 555, 1468, 265, 131, 151, 415, 1026, 356, 313, 209, 223, 169, 181, 490, 250, 385, 116, 51, 98, 158, 667, 147, 538, 814, 183, 158, 184, 121, 236, 181, 204, 356, 597, 120, 182, 814, 428, 79, 148, 69, 308, 520, 139, 228, 66, 149, 329, 1455, 82, 121, 230, 97, 385, 73, 81, 187, 234, 152, 206, 165, 560, 282, 897, 126, 192, 229, 198, 152, 1590, 284, 171, 168, 124, 95, 132, 212, 425, 304, 90, 86, 260, 925, 132, 216, 186, 327, 597, 323, 117, 87, 300, 150, 152, 324, 140, 1026, 160, 170, 172, 1026, 249, 149, 159, 1289, 1289, 164, 75, 118, 278, 92, 93, 526, 202, 228, 202, 284, 145, 403, 115, 97, 174, 287, 63, 161, 161, 327, 244, 206, 246, 157, 78, 121, 206, 145, 129, 125, 240, 88, 127, 193, 98, 239, 240, 226, 240, 231, 240, 240, 240, 145, 195, 372, 215, 46, 583, 120, 74, 313, 197, 163, 216, 263, 278, 384, 322, 379, 177, 70, 132, 252, 156, 391, 188, 108, 177, 344, 215, 576, 93, 388, 201, 141, 114, 150, 116, 227, 62, 101, 1374, 251, 218, 234, 228, 490, 186, 108, 179, 450, 317, 179, 247, 124, 158, 125, 187, 251, 215, 310, 122, 102, 186, 77, 104, 73, 272, 209, 363, 86, 120, 164, 107, 322, 153, 296, 208, 682, 78, 221, 346, 87, 131, 1444, 198, 171, 177, 104, 290, 78, 793, 143, 189, 291, 1094, 74, 348, 156, 123, 131, 89, 235, 127, 136, 464, 480, 230, 192, 268, 301, 183, 244, 145, 486, 139, 175, 680, 207, 245, 534, 915, 132, 313, 154, 78, 336, 148, 91, 641, 43, 143, 212, 87, 174, 103, 100, 205, 496, 222, 310, 1063, 83, 94, 178, 582, 302, 116, 118, 82, 100, 257, 107, 460, 184, 66, 309, 267, 197, 328, 1362, 64, 116, 304, 390, 265, 253, 353, 777, 157, 1606, 306, 82, 245, 88, 177, 212, 276, 280, 116, 328, 225, 502, 129, 105, 250, 79, 121, 1593, 192, 162, 66, 232, 124, 116, 1137, 85, 89, 465, 150, 152, 98, 116, 88, 175, 206, 395, 81, 133, 859, 260, 149, 232, 164, 323, 697, 93, 149, 245, 259, 143, 224, 113, 778, 182, 804, 213]}, "baseline": {"name": "chosen", "wins": 290, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_slic_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 53551, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_slic_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_slic_llama30b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 16, "gradient_accumulation_steps": 2, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 08:54:58.417694", "total": 512, "seed": 0, "exp_name": "archangel_csft_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 66, "lengths": [285, 93, 142, 9, 108, 13, 38, 53, 96, 154, 13, 227, 224, 57, 37, 30, 210, 41, 93, 23, 146, 33, 52, 31, 99, 221, 3, 76, 54, 52, 47, 27, 286, 225, 33, 129, 42, 63, 85, 67, 41, 38, 40, 79, 199, 54, 15, 110, 90, 140, 29, 341, 28, 97, 55, 11, 5, 124, 7, 34, 55, 19, 12, 111, 131, 54, 92, 182, 72, 60, 20, 39, 6, 30, 25, 3, 472, 75, 39, 46, 21, 77, 57, 192, 42, 139, 17, 123, 142, 44, 3, 40, 173, 86, 198, 12, 120, 16, 48, 34, 50, 20, 116, 73, 26, 30, 81, 514, 66, 42, 26, 36, 60, 43, 39, 13, 55, 12, 39, 6, 12, 37, 56, 40, 177, 21, 56, 38, 155, 42, 18, 115, 213, 50, 15, 69, 10, 29, 86, 30, 161, 20, 217, 57, 1024, 199, 47, 11, 10, 1024, 93, 79, 376, 493, 54, 52, 255, 135, 8, 715, 114, 654, 59, 73, 121, 48, 15, 50, 11, 89, 830, 60, 25, 51, 356, 58, 148, 96, 218, 98, 101, 33, 22, 146, 142, 37, 7, 128, 47, 63, 45, 6, 23, 174, 64, 32, 40, 82, 156, 84, 26, 27, 129, 85, 75, 28, 129, 78, 40, 68, 42, 16, 37, 64, 221, 29, 85, 105, 43, 27, 25, 127, 21, 18, 9, 78, 52, 7, 23, 155, 87, 150, 416, 13, 80, 327, 43, 95, 61, 177, 41, 32, 97, 69, 30, 158, 52, 39, 28, 200, 96, 94, 146, 66, 6, 72, 32, 16, 10, 93, 44, 33, 62, 35, 67, 463, 45, 29, 24, 84, 51, 146, 38, 38, 151, 9, 49, 9, 4, 194, 96, 77, 145, 55, 17, 423, 220, 81, 83, 70, 119, 20, 229, 114, 41, 110, 66, 177, 277, 32, 73, 42, 21, 62, 87, 69, 26, 21, 44, 90, 76, 33, 75, 182, 49, 19, 79, 118, 65, 178, 58, 20, 4, 85, 58, 12, 89, 24, 106, 1434, 64, 147, 86, 207, 12, 46, 114, 83, 3, 89, 68, 41, 82, 102, 46, 53, 13, 64, 93, 36, 41, 21, 15, 10, 78, 32, 47, 4, 16, 44, 70, 131, 342, 165, 23, 10, 129, 51, 56, 70, 8, 283, 153, 42, 272, 43, 39, 54, 75, 103, 21, 127, 54, 22, 40, 51, 182, 14, 299, 38, 206, 65, 281, 267, 8, 23, 218, 21, 21, 1031, 96, 43, 52, 59, 146, 39, 416, 34, 176, 71, 115, 23, 7, 369, 81, 7, 43, 29, 26, 44, 6, 4, 67, 98, 10, 575, 1078, 33, 40, 163, 7, 37, 11, 7, 41, 17, 11, 168, 79, 35, 62, 57, 119, 69, 25, 3, 11, 35, 53, 62, 169, 17, 132, 35, 6, 32, 94, 29, 58, 94, 154, 33, 11, 59, 1638, 28, 119, 65, 151, 116, 14, 17, 71, 48, 28, 94, 92, 64, 76, 171, 88, 59, 79, 118, 29, 48, 81, 114, 67, 31, 60, 22, 42, 85, 11, 29, 65, 498, 770, 22, 347, 13, 19, 49, 35, 106, 140, 13, 42, 99, 948, 49]}, "baseline": {"name": "chosen", "wins": 410, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_csft_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 38321, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_csft_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_csft_pythia1-4b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 09:11:10.520065", "total": 512, "seed": 0, "exp_name": "archangel_csft_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 91, "lengths": [33, 249, 147, 11, 174, 95, 72, 20, 84, 122, 115, 298, 84, 15, 20, 23, 1022, 87, 33, 10, 59, 175, 18, 27, 118, 139, 68, 78, 143, 144, 25, 80, 36, 112, 9, 86, 12, 52, 230, 497, 48, 123, 72, 21, 19, 177, 82, 70, 48, 7, 19, 85, 167, 97, 44, 23, 153, 160, 18, 158, 59, 20, 42, 9, 18, 41, 64, 39, 23, 29, 134, 1522, 96, 448, 61, 72, 299, 90, 71, 19, 49, 16, 85, 89, 43, 235, 47, 32, 97, 15, 38, 213, 8, 19, 7, 122, 93, 126, 9, 22, 16, 41, 19, 115, 12, 230, 6, 22, 49, 128, 8, 29, 460, 47, 14, 21, 21, 42, 41, 1231, 28, 84, 115, 47, 179, 40, 53, 13, 20, 215, 94, 87, 477, 70, 299, 29, 97, 209, 46, 40, 160, 67, 32, 44, 1024, 131, 11, 284, 160, 8, 57, 230, 193, 34, 22, 6, 61, 80, 55, 39, 21, 93, 171, 35, 275, 9, 255, 44, 39, 148, 121, 121, 32, 31, 24, 90, 45, 28, 32, 156, 51, 29, 25, 46, 23, 8, 4, 93, 88, 75, 141, 10, 29, 109, 57, 77, 102, 111, 74, 78, 110, 52, 142, 573, 22, 13, 14, 108, 63, 71, 97, 58, 249, 50, 339, 48, 18, 54, 58, 19, 36, 32, 46, 45, 17, 7, 23, 67, 356, 54, 109, 208, 39, 20, 53, 101, 80, 363, 46, 12, 87, 50, 48, 143, 24, 136, 21, 93, 27, 228, 199, 40, 412, 98, 7, 125, 4, 17, 41, 119, 12, 15, 253, 118, 71, 15, 235, 55, 5, 52, 84, 99, 7, 71, 67, 42, 31, 72, 54, 176, 79, 126, 267, 9, 50, 153, 60, 31, 69, 286, 83, 17, 27, 86, 18, 84, 145, 1339, 191, 41, 38, 25, 9, 3, 527, 35, 76, 48, 80, 72, 74, 25, 192, 37, 88, 21, 34, 4, 69, 159, 47, 46, 5, 94, 4, 33, 172, 75, 36, 12, 32, 83, 47, 293, 45, 65, 147, 231, 16, 37, 53, 99, 82, 59, 53, 126, 62, 34, 56, 9, 53, 4, 97, 17, 62, 10, 48, 9, 306, 9, 18, 24, 3, 265, 64, 7, 77, 10, 116, 45, 110, 183, 162, 74, 16, 43, 86, 45, 24, 24, 35, 54, 89, 5, 69, 47, 43, 42, 17, 49, 130, 20, 156, 35, 186, 157, 80, 10, 26, 46, 145, 24, 53, 97, 1025, 160, 778, 177, 482, 67, 294, 27, 22, 116, 29, 110, 13, 25, 53, 189, 6, 13, 331, 74, 199, 52, 33, 4, 121, 22, 46, 44, 16, 22, 66, 14, 9, 127, 14, 121, 7, 118, 58, 13, 38, 80, 67, 83, 271, 5, 132, 25, 42, 46, 13, 54, 184, 188, 144, 32, 63, 25, 145, 116, 76, 18, 99, 21, 10, 11, 27, 12, 53, 121, 32, 41, 21, 41, 48, 145, 50, 95, 72, 82, 50, 10, 31, 8, 18, 55, 47, 26, 64, 48, 66, 64, 14, 341, 47, 85, 7, 39, 79, 38, 39, 39, 93, 12, 48, 33, 362, 63]}, "baseline": {"name": "chosen", "wins": 386, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_csft_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 35647, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_csft_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_csft_pythia2-8b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 09:27:51.645397", "total": 512, "seed": 0, "exp_name": "archangel_csft_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 116, "lengths": [21, 49, 68, 15, 137, 28, 44, 24, 74, 195, 118, 96, 43, 135, 9, 5, 149, 52, 65, 60, 30, 25, 106, 62, 53, 28, 7, 28, 62, 177, 47, 75, 37, 433, 51, 154, 11, 47, 69, 281, 78, 23, 54, 17, 48, 34, 16, 76, 55, 11, 19, 175, 80, 187, 41, 52, 166, 52, 6, 82, 234, 18, 26, 14, 27, 37, 13, 240, 216, 29, 23, 12, 34, 82, 20, 94, 205, 13, 51, 58, 40, 8, 74, 65, 6, 137, 110, 55, 67, 42, 535, 72, 12, 64, 56, 40, 82, 39, 15, 11, 120, 17, 113, 78, 7, 40, 75, 1232, 121, 43, 391, 40, 93, 22, 71, 220, 18, 74, 65, 94, 33, 15, 298, 61, 63, 26, 47, 31, 9, 187, 37, 57, 28, 130, 52, 92, 37, 27, 275, 244, 128, 71, 143, 64, 317, 137, 31, 60, 190, 14, 115, 1024, 23, 102, 19, 6, 14, 39, 32, 115, 67, 57, 482, 46, 73, 830, 283, 6, 8, 66, 21, 144, 90, 830, 22, 39, 43, 72, 191, 14, 123, 157, 45, 397, 240, 41, 0, 22, 96, 75, 42, 7, 31, 60, 131, 69, 22, 63, 990, 328, 223, 34, 73, 25, 38, 21, 39, 128, 109, 39, 42, 39, 47, 225, 131, 24, 102, 89, 66, 27, 23, 86, 42, 35, 20, 93, 10, 28, 57, 92, 46, 212, 256, 41, 65, 774, 85, 484, 57, 89, 48, 52, 53, 105, 33, 125, 555, 44, 63, 152, 180, 37, 204, 81, 10, 74, 4, 8, 10, 131, 43, 47, 172, 51, 21, 88, 122, 51, 12, 137, 2, 84, 45, 85, 16, 29, 25, 18, 41, 10, 94, 131, 453, 207, 37, 463, 463, 31, 132, 79, 44, 29, 59, 37, 11, 24, 18, 14, 367, 27, 45, 35, 62, 16, 148, 137, 112, 46, 27, 34, 13, 85, 306, 932, 7, 59, 61, 46, 97, 28, 126, 117, 86, 28, 26, 35, 101, 157, 41, 126, 17, 117, 128, 1434, 67, 403, 24, 144, 59, 10, 280, 24, 51, 87, 135, 36, 217, 23, 48, 60, 78, 4, 37, 84, 16, 55, 584, 21, 75, 11, 260, 44, 34, 197, 32, 6, 23, 772, 28, 43, 64, 49, 77, 54, 11, 60, 362, 85, 114, 10, 50, 58, 332, 73, 223, 6, 12, 74, 17, 3, 75, 92, 101, 57, 61, 56, 185, 26, 12, 54, 46, 33, 37, 111, 34, 483, 194, 52, 133, 112, 50, 49, 29, 58, 53, 13, 25, 29, 39, 11, 14, 22, 37, 23, 96, 51, 14, 3, 70, 91, 6, 71, 22, 566, 31, 13, 25, 61, 118, 120, 37, 205, 103, 15, 125, 49, 6, 106, 263, 83, 570, 110, 51, 39, 180, 15, 83, 260, 104, 44, 13, 60, 67, 23, 70, 93, 187, 39, 103, 20, 45, 18, 63, 29, 25, 97, 28, 57, 153, 144, 79, 63, 13, 86, 33, 76, 56, 26, 16, 7, 50, 71, 122, 99, 108, 37, 58, 192, 8, 89, 19, 7, 100, 30, 42, 7, 25, 19, 59, 32, 11, 152]}, "baseline": {"name": "chosen", "wins": 360, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_csft_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 60689, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_csft_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_csft_pythia6-9b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 09:43:45.834265", "total": 512, "seed": 0, "exp_name": "archangel_csft_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 98, "lengths": [50, 12, 88, 40, 373, 13, 259, 47, 2, 53, 80, 149, 34, 21, 42, 161, 554, 62, 36, 56, 52, 56, 15, 28, 66, 84, 3, 29, 61, 36, 102, 64, 36, 234, 88, 60, 69, 11, 44, 47, 11, 147, 193, 15, 10, 42, 11, 62, 71, 38, 37, 95, 80, 493, 89, 68, 52, 3, 6, 287, 100, 84, 61, 214, 120, 68, 28, 85, 29, 15, 85, 82, 61, 43, 70, 48, 223, 37, 24, 45, 48, 11, 62, 301, 38, 74, 19, 12, 94, 15, 14, 35, 6, 7, 19, 27, 76, 62, 11, 42, 24, 20, 55, 67, 1232, 159, 39, 1232, 80, 33, 10, 22, 104, 69, 12, 34, 146, 32, 57, 24, 60, 35, 928, 98, 64, 11, 27, 32, 17, 648, 55, 99, 93, 45, 14, 82, 153, 13, 17, 75, 224, 126, 42, 64, 380, 193, 93, 873, 35, 32, 40, 90, 24, 137, 51, 23, 15, 293, 223, 137, 63, 30, 65, 37, 26, 34, 106, 65, 39, 28, 23, 47, 135, 19, 110, 30, 52, 16, 173, 41, 62, 6, 23, 132, 262, 23, 30, 88, 84, 87, 34, 6, 23, 19, 72, 17, 43, 19, 179, 266, 108, 583, 28, 248, 83, 885, 15, 191, 47, 41, 15, 19, 161, 58, 161, 61, 76, 6, 19, 9, 20, 46, 60, 220, 66, 46, 54, 12, 94, 66, 73, 363, 350, 27, 8, 330, 133, 738, 149, 86, 369, 18, 72, 18, 10, 106, 31, 46, 25, 47, 160, 90, 84, 70, 5, 281, 5, 9, 17, 87, 74, 139, 372, 463, 245, 20, 44, 17, 6, 63, 122, 94, 104, 89, 81, 57, 238, 20, 74, 30, 95, 67, 151, 65, 31, 80, 107, 68, 151, 95, 24, 55, 36, 111, 35, 38, 301, 11, 34, 79, 17, 92, 77, 52, 139, 51, 7, 13, 453, 37, 78, 20, 133, 234, 16, 67, 18, 44, 144, 29, 36, 20, 52, 380, 19, 14, 28, 226, 22, 37, 172, 247, 40, 87, 51, 73, 105, 75, 41, 42, 14, 67, 10, 41, 38, 179, 56, 36, 66, 47, 149, 50, 105, 13, 135, 30, 41, 26, 82, 194, 7, 11, 26, 308, 54, 6, 21, 44, 58, 93, 36, 42, 53, 109, 196, 53, 543, 64, 26, 18, 84, 58, 137, 15, 14, 23, 87, 25, 111, 78, 54, 85, 248, 293, 76, 128, 57, 114, 12, 214, 40, 17, 11, 209, 77, 72, 90, 103, 28, 18, 42, 13, 15, 42, 138, 23, 73, 28, 71, 30, 7, 30, 15, 14, 170, 5, 67, 51, 203, 74, 50, 79, 13, 18, 212, 69, 42, 95, 31, 196, 25, 120, 42, 30, 54, 19, 5, 43, 220, 32, 739, 31, 146, 56, 87, 136, 12, 66, 51, 37, 63, 10, 198, 20, 97, 8, 68, 44, 5, 4, 10, 55, 47, 19, 211, 176, 44, 35, 55, 70, 150, 35, 128, 25, 37, 62, 92, 51, 85, 26, 100, 10, 28, 82, 14, 30, 66, 232, 30, 172, 29, 75, 50, 53, 25, 107, 59, 391, 95, 3, 105, 15]}, "baseline": {"name": "chosen", "wins": 384, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_csft_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 55117, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_csft_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_csft_pythia12-0b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 10:00:27.238253", "total": 512, "seed": 0, "exp_name": "archangel_csft_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 198, "lengths": [9, 79, 151, 18, 42, 140, 50, 41, 57, 34, 163, 73, 16, 128, 48, 39, 19, 110, 117, 29, 90, 23, 22, 33, 7, 112, 4, 74, 109, 93, 154, 47, 416, 38, 258, 45, 38, 64, 81, 158, 64, 69, 79, 54, 52, 110, 105, 102, 71, 25, 47, 56, 237, 133, 88, 19, 586, 725, 7, 163, 74, 12, 24, 17, 11, 57, 70, 83, 82, 14, 255, 71, 97, 212, 55, 22, 273, 444, 67, 33, 14, 92, 74, 1464, 6, 162, 82, 14, 115, 153, 270, 22, 8, 65, 71, 132, 161, 43, 13, 31, 9, 15, 234, 83, 18, 152, 190, 19, 57, 131, 8, 36, 281, 15, 40, 28, 96, 53, 74, 123, 44, 204, 99, 138, 19, 175, 205, 59, 12, 14, 18, 71, 286, 28, 135, 37, 18, 33, 108, 154, 129, 18, 23, 275, 1026, 237, 17, 7, 240, 241, 30, 87, 49, 82, 65, 11, 15, 25, 60, 42, 74, 22, 91, 49, 397, 184, 16, 216, 27, 32, 6, 10, 20, 14, 31, 8, 31, 16, 115, 92, 79, 31, 21, 156, 119, 12, 4, 138, 21, 97, 39, 4, 19, 120, 135, 12, 15, 139, 81, 349, 81, 25, 100, 215, 277, 21, 118, 80, 53, 39, 50, 57, 20, 47, 257, 64, 8, 55, 154, 116, 33, 180, 39, 273, 50, 17, 25, 19, 44, 1025, 77, 14, 179, 15, 55, 72, 169, 347, 60, 152, 310, 29, 105, 689, 27, 110, 105, 78, 56, 32, 49, 79, 109, 98, 6, 146, 6, 34, 32, 65, 12, 99, 221, 11, 46, 36, 27, 240, 6, 68, 135, 38, 29, 85, 56, 90, 28, 78, 45, 51, 39, 213, 104, 49, 47, 240, 175, 40, 183, 55, 65, 15, 92, 111, 35, 94, 91, 212, 34, 24, 48, 92, 69, 91, 218, 51, 18, 81, 42, 163, 32, 6, 123, 58, 27, 29, 51, 279, 209, 12, 39, 262, 27, 179, 42, 23, 146, 18, 25, 85, 32, 862, 24, 902, 190, 86, 75, 202, 87, 44, 62, 114, 63, 33, 71, 600, 54, 30, 115, 535, 134, 26, 40, 167, 300, 53, 191, 37, 14, 70, 119, 17, 23, 416, 28, 40, 31, 55, 23, 40, 8, 85, 93, 82, 73, 85, 110, 71, 49, 47, 41, 101, 16, 11, 52, 84, 73, 170, 56, 16, 59, 127, 113, 20, 173, 152, 91, 46, 192, 56, 42, 21, 6, 55, 13, 96, 328, 107, 105, 24, 178, 34, 7, 45, 62, 24, 149, 168, 53, 39, 15, 6, 26, 149, 148, 246, 62, 14, 44, 67, 6, 74, 46, 21, 36, 38, 43, 248, 256, 93, 14, 32, 132, 27, 47, 19, 7, 260, 298, 36, 126, 79, 44, 12, 17, 88, 80, 30, 62, 84, 104, 23, 75, 30, 769, 85, 123, 97, 20, 28, 54, 6, 27, 60, 19, 45, 22, 31, 64, 12, 199, 36, 25, 125, 88, 58, 59, 12, 161, 27, 25, 67, 150, 153, 155, 68, 57, 203, 112, 21, 17, 22, 101, 67, 67, 44, 72, 23, 187, 7, 6, 177]}, "baseline": {"name": "chosen", "wins": 278, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2007, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_csft_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 60913, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_csft_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_csft_llama7b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 10:17:30.582763", "total": 512, "seed": 0, "exp_name": "archangel_csft_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 204, "lengths": [27, 52, 35, 135, 233, 32, 30, 57, 222, 44, 8, 402, 214, 23, 813, 11, 252, 52, 89, 431, 89, 30, 10, 16, 143, 496, 4, 251, 48, 74, 113, 151, 235, 117, 10, 43, 6, 15, 70, 91, 157, 106, 193, 34, 25, 68, 260, 220, 244, 14, 21, 120, 135, 148, 20, 33, 93, 58, 11, 196, 65, 100, 185, 8, 134, 133, 12, 130, 80, 84, 98, 118, 18, 79, 57, 11, 268, 1464, 20, 16, 26, 284, 41, 71, 43, 153, 40, 80, 152, 24, 171, 106, 13, 9, 14, 15, 73, 83, 13, 11, 9, 14, 119, 389, 15, 47, 235, 195, 147, 7, 24, 25, 11, 200, 19, 96, 12, 23, 54, 63, 35, 324, 83, 19, 20, 12, 148, 59, 56, 8, 67, 63, 318, 18, 108, 65, 23, 69, 54, 25, 99, 65, 38, 23, 521, 192, 53, 70, 110, 269, 283, 103, 352, 212, 56, 6, 21, 81, 27, 23, 43, 187, 137, 16, 169, 63, 9, 159, 4, 144, 70, 35, 26, 726, 28, 6, 63, 3, 82, 60, 94, 28, 6, 46, 72, 19, 8, 238, 20, 67, 43, 6, 32, 152, 234, 5, 87, 160, 162, 69, 83, 43, 97, 125, 138, 15, 16, 212, 66, 20, 19, 51, 11, 15, 44, 51, 87, 24, 61, 131, 13, 358, 8, 458, 12, 44, 4, 13, 20, 99, 58, 54, 20, 3, 30, 88, 41, 462, 54, 30, 12, 20, 78, 11, 55, 123, 24, 69, 26, 218, 112, 23, 73, 43, 23, 21, 7, 9, 36, 44, 25, 44, 182, 56, 193, 18, 16, 37, 7, 81, 37, 31, 47, 107, 148, 34, 47, 39, 62, 49, 64, 75, 189, 12, 240, 241, 26, 98, 146, 429, 300, 15, 72, 91, 21, 68, 27, 90, 381, 56, 91, 105, 42, 106, 53, 11, 74, 27, 53, 201, 27, 137, 84, 205, 34, 13, 111, 96, 71, 124, 53, 25, 111, 201, 10, 29, 71, 113, 10, 133, 70, 100, 25, 31, 85, 1375, 282, 36, 23, 161, 65, 18, 53, 87, 128, 305, 13, 45, 58, 81, 149, 13, 180, 440, 368, 19, 67, 66, 9, 104, 265, 17, 60, 144, 32, 6, 118, 50, 108, 27, 57, 111, 90, 54, 228, 18, 101, 38, 56, 125, 91, 211, 19, 16, 52, 66, 107, 69, 94, 32, 5, 111, 221, 113, 716, 14, 92, 130, 9, 76, 38, 25, 20, 243, 52, 221, 91, 191, 133, 91, 83, 20, 13, 132, 172, 3, 49, 11, 57, 37, 7, 6, 9, 485, 26, 11, 93, 18, 72, 26, 84, 322, 40, 19, 30, 12, 24, 52, 230, 81, 11, 492, 66, 40, 64, 77, 13, 54, 43, 41, 129, 11, 49, 18, 20, 219, 90, 80, 43, 37, 29, 62, 128, 25, 416, 84, 466, 85, 52, 89, 89, 4, 20, 22, 12, 70, 23, 34, 36, 56, 51, 26, 58, 49, 46, 46, 34, 52, 48, 84, 13, 389, 390, 51, 544, 31, 962, 262, 8, 85, 6, 27, 59, 69, 54, 124, 95, 72, 28, 49, 24, 55]}, "baseline": {"name": "chosen", "wins": 267, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2007, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_csft_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 46905, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_csft_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_csft_llama13b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 10:32:45.568550", "total": 512, "seed": 0, "exp_name": "archangel_csft_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 198, "lengths": [54, 32, 61, 57, 87, 13, 60, 34, 142, 433, 29, 25, 51, 43, 115, 141, 334, 19, 58, 320, 22, 33, 42, 212, 58, 295, 16, 153, 46, 21, 134, 752, 30, 71, 119, 80, 9, 65, 23, 128, 33, 105, 62, 30, 23, 40, 136, 78, 311, 38, 20, 65, 83, 190, 83, 37, 195, 255, 6, 150, 89, 12, 83, 92, 281, 28, 56, 81, 331, 540, 13, 15, 28, 244, 57, 28, 190, 37, 115, 22, 27, 21, 106, 231, 6, 20, 42, 226, 20, 53, 13, 56, 198, 6, 34, 345, 54, 10, 41, 12, 16, 11, 182, 95, 48, 82, 136, 95, 78, 22, 14, 38, 98, 240, 21, 16, 30, 8, 20, 106, 46, 12, 238, 60, 39, 134, 46, 56, 14, 86, 12, 12, 60, 42, 260, 13, 55, 74, 249, 353, 222, 129, 216, 166, 1026, 224, 26, 358, 22, 215, 119, 162, 1025, 520, 42, 18, 18, 9, 27, 31, 36, 64, 57, 31, 194, 121, 34, 48, 10, 346, 106, 29, 8, 92, 77, 21, 241, 11, 217, 100, 92, 15, 7, 138, 32, 15, 10, 78, 38, 65, 29, 17, 72, 52, 120, 13, 58, 72, 114, 108, 56, 36, 133, 95, 141, 26, 53, 100, 103, 90, 23, 19, 18, 101, 165, 51, 7, 28, 80, 134, 37, 148, 18, 279, 6, 211, 24, 31, 60, 86, 84, 261, 124, 106, 38, 48, 20, 364, 177, 176, 32, 36, 51, 10, 31, 122, 15, 25, 57, 18, 116, 48, 284, 47, 8, 85, 6, 57, 169, 223, 9, 23, 91, 41, 13, 26, 137, 96, 13, 149, 40, 101, 100, 90, 67, 10, 142, 140, 51, 85, 51, 80, 105, 28, 240, 118, 96, 90, 157, 114, 13, 6, 101, 50, 18, 159, 315, 76, 7, 96, 45, 68, 502, 168, 29, 49, 29, 42, 178, 131, 28, 37, 178, 52, 28, 14, 44, 290, 82, 24, 101, 44, 24, 189, 15, 36, 63, 9, 32, 14, 102, 124, 24, 29, 31, 420, 78, 180, 43, 353, 58, 8, 26, 122, 78, 306, 65, 50, 256, 74, 101, 6, 33, 347, 111, 25, 38, 12, 40, 119, 24, 99, 17, 682, 63, 11, 113, 88, 51, 479, 227, 51, 82, 42, 135, 30, 25, 15, 40, 143, 130, 113, 190, 6, 47, 41, 70, 41, 99, 26, 89, 24, 249, 158, 23, 84, 95, 4, 66, 117, 10, 27, 91, 528, 58, 172, 915, 57, 314, 28, 34, 21, 20, 466, 56, 7, 40, 37, 93, 7, 11, 6, 21, 75, 138, 16, 85, 4, 73, 84, 230, 56, 41, 41, 12, 32, 29, 66, 347, 47, 11, 241, 36, 205, 41, 178, 24, 6, 214, 151, 95, 57, 40, 13, 65, 55, 144, 93, 149, 37, 72, 357, 123, 111, 104, 29, 40, 318, 35, 13, 11, 6, 18, 29, 47, 84, 26, 22, 120, 73, 206, 39, 39, 158, 132, 70, 23, 21, 44, 147, 20, 48, 25, 112, 53, 16, 55, 259, 318, 14, 6, 8, 51, 25, 59, 199, 106, 42, 207, 43, 5, 58]}, "baseline": {"name": "chosen", "wins": 283, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2007, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_csft_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 38277, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_csft_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_csft_llama30b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 16, "gradient_accumulation_steps": 2, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 10:48:50.985633", "total": 512, "seed": 0, "exp_name": "archangel_sft+dpo_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 58, "lengths": [1153, 975, 1153, 295, 149, 239, 233, 224, 1153, 1152, 218, 177, 191, 108, 138, 1153, 202, 161, 133, 1153, 1153, 686, 1153, 234, 196, 142, 7, 29, 532, 1153, 161, 256, 9, 660, 330, 83, 95, 830, 182, 261, 196, 830, 814, 830, 530, 214, 417, 94, 471, 209, 103, 342, 304, 830, 293, 830, 462, 830, 86, 704, 830, 569, 390, 830, 87, 37, 190, 267, 1112, 1522, 99, 692, 118, 195, 59, 687, 262, 96, 185, 719, 445, 1522, 34, 1522, 90, 771, 128, 1075, 1522, 164, 1522, 213, 310, 19, 1523, 94, 32, 424, 99, 257, 56, 337, 215, 41, 1019, 156, 1232, 337, 1164, 188, 21, 1234, 69, 594, 160, 1232, 1168, 456, 672, 169, 487, 273, 527, 354, 1232, 236, 561, 352, 1024, 1018, 657, 1024, 540, 180, 37, 1024, 565, 665, 523, 67, 1024, 191, 92, 368, 1024, 110, 106, 1024, 407, 851, 1024, 1024, 46, 109, 1024, 370, 299, 447, 250, 139, 830, 830, 830, 141, 392, 105, 273, 118, 208, 83, 830, 830, 235, 296, 830, 121, 830, 163, 648, 830, 830, 175, 78, 830, 830, 26, 101, 830, 333, 830, 424, 62, 34, 197, 321, 130, 1622, 683, 1622, 1622, 806, 1266, 98, 1622, 443, 824, 175, 321, 67, 88, 553, 81, 465, 350, 1622, 1133, 85, 1353, 1312, 114, 1141, 671, 309, 417, 27, 114, 579, 679, 131, 56, 342, 1125, 1125, 1125, 1125, 1125, 170, 236, 848, 79, 110, 713, 1066, 19, 29, 307, 1125, 714, 1125, 461, 108, 74, 262, 1125, 289, 168, 17, 42, 59, 72, 463, 23, 463, 155, 463, 16, 463, 463, 95, 311, 270, 463, 463, 52, 360, 422, 463, 318, 90, 463, 166, 29, 228, 81, 25, 463, 463, 226, 120, 222, 91, 158, 1340, 1340, 75, 1340, 1340, 641, 199, 245, 145, 751, 85, 170, 150, 1340, 246, 1340, 1059, 1338, 1340, 1340, 1340, 1340, 374, 1340, 1340, 377, 634, 448, 419, 1177, 220, 137, 355, 124, 1434, 731, 180, 1434, 1432, 1436, 81, 823, 1434, 1434, 288, 138, 189, 524, 571, 59, 45, 105, 13, 648, 476, 104, 239, 355, 515, 1156, 162, 855, 855, 25, 434, 854, 855, 855, 84, 855, 855, 855, 611, 346, 258, 362, 314, 855, 265, 279, 193, 197, 604, 46, 846, 159, 93, 551, 834, 403, 321, 26, 1031, 521, 66, 1031, 78, 644, 514, 54, 315, 325, 255, 425, 119, 162, 267, 478, 107, 54, 208, 172, 659, 85, 1031, 78, 813, 27, 66, 1031, 63, 342, 239, 51, 519, 212, 1094, 22, 50, 1289, 380, 1289, 1289, 267, 738, 3, 163, 95, 13, 402, 982, 1289, 84, 170, 673, 284, 1289, 49, 1289, 400, 72, 1289, 144, 262, 274, 663, 105, 1637, 149, 36, 224, 1638, 987, 89, 659, 228, 1638, 1638, 77, 768, 1635, 54, 1638, 471, 229, 239, 828, 95, 309, 1642, 39, 272, 51, 354, 1638, 195, 451, 1637, 881, 1102, 653, 387, 65, 1102, 1102, 1020, 44, 641, 35, 1102, 25, 801, 820, 1102, 721, 326, 235, 373, 186, 68, 529, 127, 328, 800, 1101, 1102, 84, 1102, 1102, 192]}, "baseline": {"name": "chosen", "wins": 434, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+dpo_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 51645, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+dpo_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+dpo_pythia1-4b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia1-4b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 11:04:31.016775", "total": 512, "seed": 0, "exp_name": "archangel_sft+dpo_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 94, "lengths": [352, 333, 292, 937, 60, 1153, 227, 209, 509, 1153, 102, 104, 336, 191, 384, 86, 807, 127, 179, 593, 159, 1153, 231, 893, 185, 150, 5, 1153, 139, 169, 617, 277, 68, 182, 629, 245, 39, 96, 104, 596, 585, 443, 54, 708, 115, 677, 249, 154, 830, 830, 443, 266, 219, 522, 172, 387, 263, 549, 51, 81, 80, 74, 633, 432, 581, 165, 389, 105, 147, 63, 126, 1523, 1522, 1522, 69, 1522, 1465, 414, 1522, 20, 298, 144, 311, 442, 83, 643, 66, 10, 141, 294, 287, 313, 497, 114, 42, 818, 1232, 131, 9, 18, 108, 39, 1232, 1231, 1274, 254, 1232, 63, 1232, 251, 16, 52, 106, 234, 35, 171, 135, 93, 1232, 168, 673, 728, 864, 156, 869, 514, 1232, 92, 147, 1025, 306, 530, 1024, 94, 97, 668, 21, 558, 1024, 299, 347, 127, 269, 234, 1024, 1024, 443, 586, 235, 155, 392, 359, 197, 179, 170, 5, 68, 429, 496, 1024, 830, 62, 830, 72, 830, 105, 832, 97, 123, 510, 661, 83, 147, 830, 829, 32, 167, 344, 124, 829, 830, 52, 251, 656, 87, 6, 3, 63, 829, 830, 264, 17, 17, 400, 138, 12, 114, 331, 158, 131, 215, 904, 117, 884, 720, 24, 193, 135, 148, 61, 80, 101, 106, 72, 1622, 347, 850, 29, 60, 168, 411, 168, 199, 431, 300, 1125, 385, 130, 147, 141, 51, 393, 1125, 49, 141, 142, 387, 273, 369, 187, 192, 74, 97, 1125, 96, 1125, 453, 366, 335, 1125, 871, 55, 140, 675, 6, 92, 4, 463, 463, 463, 463, 108, 463, 463, 463, 423, 463, 67, 6, 132, 178, 122, 279, 72, 463, 300, 271, 12, 308, 179, 70, 403, 222, 62, 330, 156, 417, 301, 132, 259, 125, 135, 297, 203, 190, 588, 238, 558, 136, 192, 511, 28, 320, 327, 315, 33, 87, 304, 679, 687, 37, 14, 152, 1124, 22, 1340, 184, 51, 522, 33, 45, 433, 1434, 297, 231, 15, 177, 923, 73, 192, 77, 732, 21, 603, 194, 1433, 62, 745, 79, 427, 81, 1434, 152, 190, 160, 1434, 909, 51, 172, 382, 268, 17, 153, 755, 855, 91, 855, 769, 38, 164, 87, 329, 855, 239, 211, 62, 451, 59, 431, 855, 141, 106, 364, 306, 42, 111, 107, 91, 310, 103, 91, 855, 855, 30, 49, 29, 634, 237, 118, 187, 165, 697, 1031, 1031, 327, 72, 851, 425, 82, 411, 307, 90, 127, 341, 153, 1030, 1031, 126, 361, 40, 115, 186, 150, 972, 911, 19, 1289, 66, 178, 37, 82, 102, 274, 72, 170, 733, 1289, 150, 181, 168, 98, 1289, 696, 29, 80, 42, 86, 176, 41, 287, 60, 203, 546, 54, 36, 113, 256, 117, 155, 1638, 795, 196, 359, 491, 130, 433, 180, 80, 1507, 123, 1638, 142, 181, 583, 71, 305, 1118, 265, 1638, 93, 599, 6, 69, 123, 9, 738, 209, 505, 508, 305, 562, 297, 459, 383, 46, 410, 49, 310, 97, 14, 71, 467, 39, 136, 1102, 1102, 146, 1102, 327, 213, 52, 11, 277, 823, 166, 298, 439, 45, 289, 106, 2, 81]}, "baseline": {"name": "chosen", "wins": 392, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+dpo_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 44509, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+dpo_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+dpo_pythia2-8b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia2-8b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 11:19:46.156412", "total": 512, "seed": 0, "exp_name": "archangel_sft+dpo_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 107, "lengths": [209, 150, 1153, 185, 71, 69, 208, 108, 1153, 180, 324, 54, 155, 1153, 104, 87, 34, 141, 95, 384, 200, 1153, 90, 23, 46, 86, 4, 271, 489, 99, 373, 518, 47, 830, 830, 105, 6, 102, 313, 158, 36, 109, 300, 234, 69, 562, 42, 112, 290, 62, 112, 262, 47, 54, 190, 475, 55, 351, 6, 632, 177, 174, 35, 498, 414, 453, 262, 562, 815, 166, 364, 78, 212, 667, 69, 184, 131, 85, 35, 54, 322, 153, 38, 72, 9, 708, 146, 34, 229, 48, 130, 64, 151, 7, 9, 408, 166, 88, 17, 53, 42, 1232, 389, 299, 1231, 20, 1150, 115, 70, 115, 65, 69, 183, 143, 83, 145, 19, 102, 121, 110, 406, 144, 898, 758, 1232, 433, 269, 347, 538, 162, 94, 135, 1023, 366, 264, 1024, 353, 274, 577, 1024, 404, 350, 123, 382, 1024, 169, 67, 1024, 142, 111, 380, 254, 77, 59, 105, 6, 173, 286, 210, 283, 151, 519, 293, 161, 182, 125, 70, 253, 8, 205, 82, 311, 105, 830, 199, 222, 44, 120, 340, 378, 92, 832, 53, 830, 637, 10, 77, 830, 127, 248, 233, 6, 86, 87, 584, 7, 344, 107, 1391, 200, 82, 9, 150, 254, 1620, 6, 127, 145, 84, 82, 48, 166, 17, 227, 1622, 59, 139, 125, 176, 70, 387, 293, 60, 317, 194, 125, 242, 109, 553, 193, 497, 1125, 1125, 357, 232, 385, 107, 345, 313, 111, 144, 277, 270, 23, 149, 110, 1125, 74, 174, 682, 119, 52, 1125, 219, 6, 210, 15, 366, 127, 463, 463, 26, 172, 33, 463, 116, 108, 311, 46, 124, 137, 442, 172, 130, 463, 164, 463, 223, 463, 31, 195, 423, 220, 30, 25, 227, 463, 121, 104, 216, 46, 4, 1340, 77, 148, 143, 818, 190, 334, 341, 168, 1340, 109, 111, 1340, 6, 53, 208, 96, 92, 203, 5, 249, 478, 417, 176, 234, 82, 676, 200, 596, 87, 87, 639, 35, 14, 65, 176, 103, 73, 127, 1129, 1325, 222, 234, 457, 207, 337, 77, 88, 134, 108, 72, 138, 41, 106, 458, 68, 104, 195, 539, 33, 855, 855, 100, 284, 130, 111, 90, 411, 122, 61, 42, 742, 182, 28, 174, 88, 154, 855, 169, 727, 528, 84, 313, 117, 122, 197, 71, 855, 194, 249, 33, 16, 508, 29, 160, 1030, 335, 472, 245, 103, 707, 652, 403, 20, 123, 237, 403, 1031, 142, 66, 447, 1031, 41, 291, 86, 30, 127, 100, 88, 401, 155, 190, 1031, 47, 374, 60, 17, 39, 6, 14, 328, 527, 1289, 100, 197, 206, 192, 124, 132, 449, 28, 42, 46, 156, 462, 859, 1289, 89, 25, 69, 130, 12, 362, 31, 5, 45, 313, 38, 858, 118, 24, 768, 152, 581, 75, 259, 1622, 1434, 343, 123, 35, 154, 88, 310, 140, 166, 21, 1638, 355, 11, 37, 53, 55, 53, 198, 118, 1640, 636, 276, 45, 105, 196, 221, 277, 210, 1102, 83, 4, 155, 21, 1102, 240, 45, 49, 126, 156, 469, 140, 69, 59, 759, 185, 6, 275, 476, 1102, 89, 84, 66, 25]}, "baseline": {"name": "chosen", "wins": 374, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+dpo_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 33685, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+dpo_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+dpo_pythia6-9b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia6-9b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 11:35:50.616081", "total": 512, "seed": 0, "exp_name": "archangel_sft+dpo_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 116, "lengths": [572, 283, 65, 48, 232, 57, 159, 41, 346, 1153, 102, 109, 182, 37, 426, 324, 485, 39, 275, 161, 11, 476, 1153, 66, 1153, 132, 18, 60, 73, 81, 520, 113, 9, 816, 43, 125, 244, 57, 29, 830, 66, 830, 47, 135, 830, 150, 125, 394, 300, 115, 10, 182, 410, 90, 149, 293, 360, 231, 6, 92, 56, 830, 706, 289, 56, 121, 43, 157, 30, 15, 112, 97, 49, 415, 26, 288, 515, 87, 283, 1044, 15, 1522, 64, 61, 181, 113, 219, 23, 291, 26, 318, 51, 68, 8, 179, 112, 155, 117, 157, 38, 21, 1232, 47, 112, 1232, 574, 322, 113, 186, 74, 4, 482, 107, 19, 334, 304, 146, 106, 25, 86, 215, 71, 729, 144, 112, 122, 121, 53, 1024, 1024, 103, 291, 253, 175, 127, 315, 6, 22, 484, 98, 121, 54, 1024, 491, 1024, 546, 213, 1024, 959, 480, 217, 173, 195, 328, 48, 6, 161, 48, 174, 106, 53, 830, 297, 106, 416, 154, 829, 32, 225, 830, 135, 153, 222, 310, 108, 115, 79, 21, 830, 221, 830, 18, 11, 59, 69, 10, 59, 151, 188, 627, 830, 821, 32, 46, 185, 52, 129, 207, 227, 882, 128, 217, 120, 335, 574, 80, 613, 170, 47, 16, 11, 45, 103, 135, 66, 10, 44, 70, 251, 12, 106, 52, 190, 465, 10, 818, 36, 77, 11, 59, 61, 477, 73, 81, 723, 92, 30, 398, 96, 90, 200, 7, 135, 812, 110, 153, 19, 178, 39, 1125, 317, 77, 78, 261, 3, 87, 4, 177, 34, 14, 51, 11, 206, 77, 167, 221, 118, 463, 5, 266, 48, 463, 17, 160, 282, 270, 73, 323, 383, 342, 72, 322, 72, 162, 57, 463, 355, 449, 131, 112, 47, 32, 48, 187, 22, 592, 103, 123, 1340, 1340, 370, 80, 1339, 355, 1175, 446, 74, 297, 323, 164, 257, 23, 111, 194, 205, 24, 231, 1277, 1340, 1139, 68, 116, 37, 412, 46, 13, 102, 112, 924, 70, 232, 1261, 909, 23, 484, 80, 201, 104, 1434, 1076, 475, 95, 55, 24, 55, 124, 168, 58, 357, 755, 1434, 90, 343, 421, 34, 46, 213, 709, 15, 64, 195, 855, 361, 660, 341, 19, 58, 707, 170, 205, 145, 104, 236, 77, 324, 240, 320, 159, 274, 42, 37, 155, 855, 30, 238, 59, 137, 345, 494, 13, 172, 180, 318, 1031, 288, 62, 168, 460, 101, 383, 8, 138, 30, 51, 1031, 182, 1031, 622, 261, 17, 179, 123, 629, 47, 103, 112, 276, 15, 202, 59, 19, 36, 106, 188, 294, 54, 263, 10, 579, 564, 81, 737, 146, 33, 192, 78, 87, 496, 685, 492, 46, 198, 41, 187, 43, 130, 64, 348, 41, 49, 218, 273, 388, 415, 9, 38, 1638, 48, 205, 42, 237, 43, 155, 322, 188, 37, 84, 346, 38, 1638, 787, 60, 46, 17, 32, 53, 188, 41, 423, 211, 211, 29, 1102, 333, 190, 20, 48, 19, 212, 66, 12, 530, 29, 32, 145, 1102, 1102, 519, 360, 938, 85, 553, 6, 91, 621, 318, 1102, 199, 119, 448, 31, 1102]}, "baseline": {"name": "chosen", "wins": 362, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+dpo_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 39915, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+dpo_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+dpo_pythia12-0b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia12-0b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 11:51:57.856282", "total": 512, "seed": 0, "exp_name": "archangel_sft+dpo_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 219, "lengths": [257, 311, 45, 208, 215, 308, 121, 42, 1086, 1086, 442, 1085, 250, 167, 235, 32, 1086, 211, 500, 195, 148, 54, 142, 189, 124, 221, 6, 321, 125, 196, 168, 507, 742, 81, 187, 193, 25, 130, 81, 139, 303, 353, 219, 401, 367, 80, 252, 95, 513, 138, 196, 151, 82, 528, 132, 175, 742, 394, 6, 437, 107, 95, 120, 355, 109, 18, 216, 116, 567, 123, 107, 211, 530, 384, 437, 61, 158, 123, 838, 241, 109, 97, 91, 1464, 9, 183, 92, 190, 111, 170, 143, 142, 47, 23, 168, 309, 201, 367, 997, 72, 109, 168, 997, 353, 102, 934, 919, 65, 90, 258, 13, 92, 88, 288, 15, 177, 201, 209, 66, 291, 90, 304, 788, 124, 543, 168, 529, 91, 301, 360, 128, 413, 587, 38, 150, 435, 35, 330, 336, 452, 166, 84, 173, 608, 1026, 480, 397, 412, 299, 314, 211, 478, 165, 138, 90, 6, 260, 504, 205, 138, 337, 536, 548, 42, 477, 165, 212, 275, 277, 592, 209, 85, 369, 423, 255, 227, 283, 87, 282, 695, 566, 184, 175, 814, 813, 12, 11, 22, 243, 137, 180, 7, 27, 287, 207, 39, 205, 54, 1070, 156, 108, 195, 341, 86, 1009, 14, 123, 120, 157, 50, 52, 30, 319, 194, 1590, 57, 557, 40, 317, 202, 1590, 210, 717, 341, 225, 220, 149, 71, 128, 89, 52, 492, 248, 81, 214, 85, 209, 357, 250, 171, 302, 1026, 294, 469, 148, 101, 313, 37, 283, 956, 133, 81, 430, 674, 9, 160, 6, 240, 239, 240, 206, 139, 183, 240, 240, 240, 240, 240, 6, 239, 240, 95, 175, 80, 240, 128, 85, 206, 218, 199, 240, 228, 239, 118, 239, 240, 240, 72, 167, 518, 317, 4, 195, 174, 127, 82, 620, 73, 446, 587, 71, 1145, 118, 314, 1283, 11, 1284, 204, 52, 394, 32, 6, 317, 71, 687, 434, 169, 1282, 161, 1283, 62, 350, 401, 149, 1375, 468, 729, 1375, 53, 174, 57, 1375, 609, 27, 46, 84, 496, 147, 154, 340, 89, 35, 53, 249, 231, 592, 194, 44, 874, 46, 527, 17, 341, 249, 683, 54, 185, 52, 247, 236, 169, 250, 421, 64, 96, 4, 462, 122, 248, 209, 264, 109, 214, 31, 526, 131, 683, 260, 119, 429, 314, 113, 356, 9, 79, 124, 691, 729, 73, 187, 915, 335, 421, 205, 373, 233, 162, 381, 287, 97, 54, 229, 229, 255, 141, 643, 915, 145, 915, 188, 128, 914, 35, 268, 915, 14, 446, 273, 63, 53, 16, 4, 354, 149, 452, 184, 666, 25, 84, 93, 140, 416, 161, 209, 78, 86, 343, 366, 545, 80, 26, 1220, 391, 54, 198, 133, 6, 199, 384, 231, 241, 193, 135, 19, 103, 72, 189, 121, 208, 154, 193, 194, 133, 273, 111, 256, 131, 133, 155, 182, 227, 31, 181, 371, 78, 119, 282, 1593, 68, 51, 298, 52, 97, 542, 62, 221, 103, 96, 312, 166, 458, 503, 67, 799, 336, 126, 156, 252, 208, 328, 229, 15, 158, 21, 962, 326, 389, 104, 420, 91, 5, 283]}, "baseline": {"name": "chosen", "wins": 268, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+dpo_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 55125, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+dpo_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+dpo_llama7b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama7b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 12:08:34.433316", "total": 512, "seed": 0, "exp_name": "archangel_sft+dpo_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 255, "lengths": [446, 573, 730, 252, 101, 65, 546, 152, 445, 447, 259, 834, 1086, 321, 342, 41, 302, 234, 112, 138, 220, 341, 486, 229, 1086, 248, 6, 1084, 238, 134, 148, 519, 630, 59, 267, 459, 118, 742, 166, 742, 161, 425, 335, 385, 523, 54, 742, 735, 742, 161, 742, 346, 195, 241, 179, 79, 47, 663, 6, 269, 94, 302, 575, 742, 224, 108, 288, 139, 799, 201, 529, 143, 765, 1105, 835, 147, 276, 214, 305, 1170, 1463, 463, 73, 864, 293, 169, 338, 418, 172, 220, 250, 830, 61, 626, 738, 668, 711, 768, 20, 117, 801, 69, 257, 172, 146, 717, 423, 322, 102, 601, 15, 194, 145, 430, 687, 206, 92, 151, 144, 234, 102, 284, 585, 134, 401, 179, 681, 230, 89, 1025, 315, 1025, 813, 219, 450, 181, 37, 176, 580, 324, 180, 58, 134, 212, 1026, 354, 278, 435, 180, 218, 124, 655, 146, 309, 457, 24, 572, 128, 271, 1025, 437, 409, 814, 157, 814, 140, 118, 814, 388, 440, 63, 814, 226, 814, 503, 814, 118, 814, 554, 293, 812, 191, 186, 517, 360, 56, 30, 467, 303, 814, 271, 10, 168, 325, 709, 122, 1185, 23, 674, 414, 156, 430, 567, 329, 612, 11, 764, 368, 138, 180, 65, 68, 143, 220, 312, 180, 452, 326, 371, 218, 503, 494, 795, 726, 389, 336, 72, 372, 73, 104, 66, 722, 207, 1026, 255, 348, 735, 491, 200, 248, 435, 95, 227, 462, 507, 81, 1026, 40, 183, 594, 179, 142, 455, 234, 479, 254, 6, 240, 146, 240, 239, 240, 178, 156, 240, 240, 193, 240, 189, 240, 240, 127, 239, 84, 240, 239, 38, 240, 240, 240, 181, 95, 240, 240, 240, 240, 105, 40, 115, 663, 254, 105, 219, 99, 170, 212, 916, 359, 272, 176, 121, 694, 231, 523, 636, 22, 187, 445, 397, 550, 216, 5, 346, 121, 419, 475, 101, 546, 188, 169, 166, 186, 1375, 381, 1206, 1054, 249, 1200, 290, 498, 108, 338, 332, 172, 213, 185, 738, 61, 331, 490, 93, 102, 108, 102, 489, 255, 118, 267, 730, 343, 1375, 68, 585, 683, 683, 191, 99, 183, 126, 141, 258, 375, 683, 539, 31, 4, 348, 59, 253, 683, 563, 149, 683, 529, 642, 376, 355, 238, 121, 250, 624, 101, 569, 32, 151, 785, 319, 633, 72, 133, 794, 203, 232, 503, 411, 159, 915, 167, 156, 265, 915, 117, 93, 237, 357, 330, 854, 275, 212, 261, 166, 915, 169, 173, 384, 541, 650, 181, 122, 167, 20, 28, 552, 1041, 318, 492, 239, 39, 1220, 417, 387, 209, 1206, 82, 1219, 159, 156, 381, 488, 86, 1220, 945, 381, 254, 788, 638, 7, 270, 585, 210, 278, 243, 55, 1593, 24, 327, 298, 247, 1192, 1593, 488, 303, 115, 334, 411, 133, 404, 234, 262, 55, 569, 47, 365, 561, 972, 55, 799, 375, 227, 333, 495, 149, 193, 742, 153, 962, 406, 323, 403, 364, 224, 544, 134, 151, 341, 892, 188, 165, 265, 853, 119, 148, 183, 37, 270, 170, 547, 684, 266, 320, 13, 273]}, "baseline": {"name": "chosen", "wins": 236, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+dpo_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 35475, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+dpo_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+dpo_llama13b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama13b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 12:25:00.231068", "total": 512, "seed": 0, "exp_name": "archangel_sft+dpo_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 316, "lengths": [663, 367, 134, 1460, 1460, 1460, 1460, 1460, 890, 618, 392, 430, 580, 1460, 722, 154, 1086, 251, 1086, 109, 69, 124, 1086, 1086, 1086, 802, 13, 627, 224, 117, 1086, 383, 922, 521, 182, 1404, 1404, 291, 154, 255, 1404, 428, 167, 1404, 1253, 1404, 1404, 104, 742, 742, 537, 154, 72, 430, 742, 742, 577, 502, 16, 281, 109, 574, 742, 169, 1511, 1511, 197, 1511, 407, 428, 1510, 247, 1325, 351, 1511, 1511, 1511, 1511, 1321, 1511, 1464, 1464, 242, 292, 1457, 151, 1464, 1464, 632, 1464, 1463, 728, 770, 6, 1464, 197, 315, 997, 325, 997, 724, 997, 128, 997, 996, 545, 478, 285, 997, 997, 997, 63, 1484, 1254, 348, 1485, 1485, 1485, 1485, 1485, 115, 125, 336, 1485, 769, 123, 644, 1485, 1574, 589, 1415, 188, 1574, 1574, 197, 932, 1574, 557, 1574, 952, 773, 1574, 168, 514, 1026, 727, 1025, 720, 191, 254, 104, 1025, 403, 406, 1025, 24, 1025, 124, 1003, 886, 814, 355, 414, 290, 814, 814, 295, 776, 814, 814, 301, 51, 814, 813, 437, 814, 383, 153, 1455, 1454, 513, 1455, 1455, 1455, 499, 1455, 1455, 1455, 1455, 1236, 1455, 34, 320, 175, 1590, 1589, 1589, 1590, 286, 149, 225, 1590, 796, 1590, 276, 68, 215, 221, 80, 334, 549, 1711, 1710, 1711, 323, 1711, 330, 1711, 344, 828, 1711, 668, 466, 1710, 1026, 657, 1026, 1026, 102, 72, 1026, 382, 280, 144, 188, 537, 384, 1026, 485, 1025, 1288, 1289, 1289, 117, 153, 1289, 748, 412, 1289, 1289, 1289, 79, 419, 389, 24, 658, 7, 539, 1358, 1358, 1358, 1358, 1319, 118, 516, 1358, 156, 427, 1358, 460, 432, 444, 182, 84, 240, 240, 240, 122, 240, 240, 240, 203, 240, 240, 240, 240, 165, 34, 586, 1525, 298, 1525, 1525, 1524, 1525, 135, 1339, 248, 344, 1008, 218, 365, 451, 948, 367, 1283, 1283, 119, 1283, 440, 280, 1283, 109, 99, 586, 1283, 157, 567, 209, 206, 66, 343, 1374, 1375, 1375, 1375, 853, 1375, 84, 1375, 108, 1375, 495, 88, 314, 1375, 485, 1602, 1602, 593, 809, 1602, 443, 130, 726, 336, 1602, 1602, 608, 1602, 1602, 1602, 683, 682, 683, 329, 213, 683, 249, 253, 207, 683, 683, 295, 48, 6, 683, 49, 185, 296, 668, 96, 891, 1444, 500, 244, 674, 555, 823, 693, 1444, 860, 301, 32, 136, 1316, 1316, 251, 81, 1316, 1039, 395, 1316, 520, 411, 488, 1315, 457, 349, 51, 914, 914, 915, 195, 515, 350, 915, 915, 580, 915, 915, 915, 915, 105, 601, 914, 637, 1220, 1220, 1220, 1220, 572, 1220, 108, 219, 1220, 1220, 59, 223, 130, 457, 1220, 1363, 171, 44, 1363, 119, 1363, 902, 940, 1363, 976, 543, 193, 262, 1362, 20, 153, 519, 1605, 1606, 1606, 523, 1606, 55, 58, 567, 1606, 366, 1606, 239, 1606, 119, 1606, 478, 1592, 354, 180, 1593, 1593, 324, 1593, 1593, 1593, 1592, 1593, 158, 520, 688, 570, 415, 48, 1528, 208, 1528, 1528, 1528, 540, 1528, 1529, 1528, 733, 1528, 467, 1175, 1527, 102, 235, 308, 746, 874, 402, 157, 962, 561, 313, 962, 290, 333, 313, 962, 962]}, "baseline": {"name": "chosen", "wins": 187, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+dpo_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 37401, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+dpo_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+dpo_llama30b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama30b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 16, "gradient_accumulation_steps": 2, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-08 12:43:33.972179", "total": 512, "seed": 0, "exp_name": "archangel_sft+kto_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 38, "lengths": [1153, 680, 181, 684, 133, 1153, 508, 157, 1153, 292, 285, 401, 1153, 468, 138, 947, 493, 81, 187, 275, 438, 442, 256, 355, 276, 471, 5, 459, 1153, 1153, 1153, 1153, 830, 830, 830, 830, 51, 830, 181, 414, 295, 830, 254, 288, 830, 483, 227, 313, 456, 830, 193, 752, 830, 532, 187, 468, 830, 830, 11, 830, 746, 830, 830, 830, 1521, 1516, 683, 434, 1522, 578, 302, 522, 709, 308, 1522, 141, 599, 1522, 120, 326, 1024, 845, 175, 1098, 72, 169, 1522, 199, 736, 1522, 1453, 481, 1482, 69, 225, 581, 376, 1232, 98, 514, 535, 417, 211, 1232, 1232, 1232, 549, 1232, 1232, 127, 21, 102, 365, 1232, 994, 649, 10, 105, 290, 718, 752, 434, 248, 150, 94, 245, 1232, 157, 1024, 1024, 887, 1024, 1024, 230, 562, 380, 431, 1024, 1022, 265, 787, 361, 232, 509, 1024, 889, 1024, 1024, 499, 1024, 185, 429, 1024, 920, 306, 1024, 1024, 116, 471, 1024, 830, 267, 830, 297, 829, 818, 782, 830, 712, 450, 830, 830, 830, 292, 827, 830, 830, 400, 830, 507, 515, 828, 830, 540, 830, 58, 71, 830, 634, 548, 830, 503, 235, 85, 405, 144, 1622, 699, 837, 634, 432, 1622, 536, 541, 771, 91, 1622, 1622, 52, 348, 649, 275, 742, 544, 407, 136, 1622, 870, 581, 156, 260, 857, 267, 387, 1125, 287, 1125, 1125, 234, 807, 1091, 1125, 1125, 1124, 207, 1125, 1125, 1125, 195, 527, 1125, 605, 1125, 1124, 585, 752, 1125, 320, 202, 483, 845, 86, 726, 567, 1125, 594, 4, 331, 420, 463, 102, 463, 463, 386, 463, 463, 191, 122, 38, 126, 463, 86, 463, 117, 463, 463, 463, 112, 142, 463, 133, 224, 463, 463, 463, 462, 463, 228, 662, 427, 211, 1223, 1340, 571, 210, 328, 1340, 92, 945, 1341, 339, 532, 988, 239, 1340, 105, 498, 1160, 1341, 1256, 1340, 204, 376, 1340, 1208, 1340, 1340, 1340, 718, 1340, 373, 1432, 132, 284, 1434, 1434, 1123, 1436, 73, 1434, 128, 1434, 604, 1434, 1434, 1434, 591, 176, 265, 1434, 1239, 1434, 113, 540, 88, 785, 1434, 397, 1421, 83, 1434, 321, 248, 855, 763, 233, 585, 855, 103, 855, 482, 320, 855, 855, 839, 71, 217, 160, 289, 853, 579, 855, 855, 228, 857, 855, 854, 251, 855, 855, 456, 115, 855, 91, 709, 1029, 513, 530, 772, 1031, 902, 92, 1031, 394, 616, 1031, 921, 767, 98, 869, 470, 177, 1031, 1007, 360, 1031, 1031, 1031, 1031, 171, 1031, 545, 549, 1015, 1031, 236, 598, 1289, 427, 1289, 23, 230, 1289, 1289, 258, 617, 1289, 3, 910, 396, 63, 1289, 919, 1286, 285, 102, 361, 253, 586, 69, 216, 1263, 317, 359, 848, 150, 552, 1288, 1638, 540, 534, 1400, 293, 165, 627, 571, 386, 229, 979, 1604, 390, 202, 465, 831, 1638, 189, 453, 345, 326, 210, 1638, 164, 243, 1638, 87, 223, 223, 222, 110, 1638, 278, 177, 410, 775, 65, 199, 243, 176, 116, 304, 1102, 159, 961, 564, 1087, 739, 1102, 273, 1101, 195, 171, 1027, 1102, 182, 809, 198, 1102, 599, 1102, 166, 1102, 1101]}, "baseline": {"name": "chosen", "wins": 457, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+kto_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 40969, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+kto_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+kto_pythia1-4b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia1-4b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 13:00:00.457633", "total": 512, "seed": 0, "exp_name": "archangel_sft+kto_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 95, "lengths": [368, 718, 1153, 1153, 317, 1153, 1153, 61, 1153, 940, 267, 504, 113, 474, 357, 1153, 111, 151, 848, 1153, 613, 983, 71, 86, 366, 1153, 5, 246, 139, 115, 905, 1043, 662, 830, 830, 830, 9, 430, 349, 753, 526, 267, 172, 830, 630, 43, 830, 170, 830, 830, 830, 329, 830, 830, 111, 212, 830, 830, 307, 830, 545, 240, 830, 54, 442, 95, 702, 1523, 1061, 1522, 1180, 1522, 427, 1522, 37, 228, 702, 786, 723, 20, 893, 639, 64, 1522, 360, 252, 689, 149, 171, 534, 1438, 1278, 213, 48, 525, 918, 140, 1232, 9, 131, 55, 184, 1232, 33, 1225, 1231, 167, 120, 1231, 1232, 16, 1232, 211, 769, 27, 557, 95, 1232, 79, 306, 597, 971, 812, 1232, 1232, 1232, 1232, 67, 400, 1024, 145, 930, 354, 704, 1024, 123, 1024, 697, 1024, 1024, 77, 214, 521, 357, 1024, 1024, 1024, 479, 772, 1024, 1024, 1024, 1024, 102, 72, 5, 1024, 1024, 258, 137, 142, 575, 311, 830, 830, 294, 101, 319, 47, 344, 472, 830, 159, 511, 830, 830, 830, 605, 404, 829, 830, 82, 832, 830, 830, 46, 3, 788, 830, 830, 527, 17, 56, 1622, 101, 540, 244, 1622, 1622, 1325, 310, 574, 1622, 120, 1622, 205, 256, 828, 144, 638, 698, 155, 132, 1622, 1622, 471, 825, 180, 1237, 114, 1622, 276, 276, 1622, 109, 133, 485, 117, 186, 473, 1125, 629, 1125, 153, 603, 342, 860, 123, 492, 128, 1125, 184, 1125, 386, 80, 182, 321, 757, 710, 1125, 1125, 235, 1125, 655, 6, 298, 4, 463, 463, 463, 292, 463, 162, 463, 463, 331, 322, 188, 14, 463, 174, 125, 463, 336, 179, 208, 463, 282, 463, 463, 393, 305, 463, 337, 463, 462, 417, 304, 120, 1340, 290, 258, 414, 612, 75, 375, 1340, 809, 105, 389, 1340, 715, 352, 1340, 1340, 77, 1340, 295, 1340, 1340, 585, 10, 603, 1340, 1340, 496, 357, 1340, 699, 1340, 339, 1434, 1242, 290, 46, 109, 1434, 1434, 54, 565, 148, 1434, 240, 1434, 138, 1342, 786, 102, 51, 1434, 71, 1210, 1434, 477, 1434, 1373, 1434, 90, 1434, 1434, 446, 182, 396, 115, 855, 63, 757, 177, 855, 855, 264, 855, 855, 855, 346, 23, 855, 39, 855, 182, 855, 855, 855, 96, 855, 76, 855, 855, 758, 77, 853, 855, 855, 91, 1031, 1031, 643, 448, 33, 993, 1031, 887, 1031, 828, 943, 1031, 148, 116, 253, 458, 207, 1029, 130, 603, 737, 1031, 706, 188, 1031, 232, 212, 143, 1031, 122, 1031, 321, 1289, 577, 363, 147, 74, 55, 1289, 53, 183, 73, 281, 3, 407, 643, 1289, 1289, 313, 197, 80, 215, 164, 111, 1027, 85, 227, 852, 1289, 111, 548, 486, 1289, 112, 683, 33, 531, 964, 302, 552, 159, 384, 738, 389, 374, 222, 106, 768, 108, 224, 322, 262, 1638, 1638, 236, 179, 352, 6, 59, 573, 1509, 382, 987, 175, 456, 172, 383, 312, 144, 607, 161, 761, 610, 1102, 951, 614, 1102, 1102, 394, 552, 1102, 1102, 1102, 1102, 1102, 735, 1102, 134, 370, 157, 1102, 159, 1102, 1042, 1102, 39, 137, 245]}, "baseline": {"name": "chosen", "wins": 391, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+kto_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 55839, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+kto_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+kto_pythia2-8b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia2-8b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 13:16:25.585185", "total": 512, "seed": 0, "exp_name": "archangel_sft+kto_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 107, "lengths": [21, 50, 112, 1152, 1153, 130, 640, 167, 1154, 125, 203, 824, 108, 576, 205, 504, 1029, 120, 287, 198, 379, 103, 1153, 1152, 430, 1153, 4, 994, 94, 186, 93, 862, 830, 830, 522, 283, 148, 52, 689, 145, 336, 231, 830, 32, 502, 468, 830, 830, 829, 285, 670, 75, 234, 339, 830, 830, 830, 106, 6, 154, 361, 338, 830, 182, 291, 20, 1522, 494, 200, 1522, 1013, 229, 1030, 1522, 62, 1522, 1331, 184, 408, 257, 281, 202, 145, 223, 6, 1386, 127, 104, 982, 226, 92, 1522, 18, 270, 1522, 596, 143, 299, 13, 76, 283, 319, 321, 1232, 747, 224, 652, 443, 750, 212, 34, 98, 72, 265, 154, 234, 1232, 1203, 453, 141, 93, 875, 1232, 647, 301, 295, 620, 127, 582, 556, 158, 1024, 1024, 49, 301, 938, 1024, 122, 399, 161, 572, 141, 1024, 92, 1024, 578, 1024, 808, 82, 252, 581, 374, 579, 529, 1024, 651, 376, 351, 102, 111, 830, 71, 830, 72, 828, 163, 177, 830, 56, 544, 128, 108, 69, 830, 283, 830, 643, 307, 830, 156, 224, 165, 288, 343, 830, 382, 4, 830, 341, 830, 45, 6, 222, 482, 258, 44, 338, 23, 165, 320, 97, 702, 1622, 591, 1038, 33, 193, 150, 103, 82, 260, 176, 1450, 466, 580, 159, 214, 247, 889, 162, 115, 1622, 47, 653, 372, 824, 284, 313, 175, 232, 431, 290, 568, 249, 127, 321, 1125, 319, 202, 218, 1123, 229, 1125, 1125, 35, 130, 1125, 107, 55, 497, 186, 376, 379, 378, 831, 1125, 6, 463, 179, 463, 40, 174, 233, 463, 463, 250, 102, 463, 22, 126, 323, 408, 461, 109, 463, 104, 121, 333, 11, 273, 249, 85, 398, 66, 463, 22, 129, 67, 250, 1340, 176, 27, 1005, 53, 75, 177, 903, 111, 1340, 152, 187, 111, 453, 486, 1340, 259, 647, 64, 69, 776, 15, 20, 473, 320, 237, 278, 345, 807, 132, 1340, 48, 773, 353, 1434, 19, 393, 471, 294, 111, 230, 68, 683, 243, 187, 111, 433, 457, 432, 1434, 259, 1434, 1434, 99, 127, 198, 963, 403, 305, 540, 315, 600, 8, 241, 855, 60, 174, 71, 254, 211, 855, 189, 855, 855, 241, 76, 36, 197, 169, 552, 126, 337, 222, 88, 173, 855, 46, 295, 294, 130, 207, 855, 61, 85, 14, 319, 243, 148, 115, 215, 198, 512, 94, 579, 473, 537, 1019, 1031, 655, 115, 90, 66, 315, 178, 319, 624, 1029, 1031, 810, 1031, 151, 40, 62, 477, 72, 352, 80, 1289, 59, 1289, 77, 36, 10, 1289, 674, 284, 1289, 431, 1289, 83, 271, 474, 1289, 121, 67, 49, 133, 1289, 1289, 1289, 425, 79, 54, 1289, 1289, 75, 1289, 26, 260, 1638, 179, 1638, 175, 224, 550, 1638, 1627, 170, 71, 444, 222, 371, 511, 1638, 148, 453, 245, 1198, 661, 194, 42, 312, 1638, 38, 83, 64, 773, 1638, 119, 1548, 665, 59, 37, 71, 526, 77, 172, 52, 1039, 481, 23, 57, 1099, 1102, 160, 738, 1100, 98, 391, 101, 200, 1102, 84, 183, 128, 1102, 133, 608, 1102, 249, 45, 1102, 694]}, "baseline": {"name": "chosen", "wins": 385, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+kto_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 40071, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+kto_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+kto_pythia6-9b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia6-9b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 13:34:08.245727", "total": 512, "seed": 0, "exp_name": "archangel_sft+kto_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 93, "lengths": [368, 919, 102, 1153, 307, 69, 515, 1102, 1011, 150, 118, 138, 168, 1152, 219, 486, 486, 564, 360, 141, 677, 188, 1153, 119, 74, 208, 548, 581, 492, 68, 1153, 1153, 9, 830, 43, 97, 830, 830, 830, 830, 686, 43, 165, 688, 829, 830, 53, 830, 124, 830, 830, 630, 440, 830, 490, 830, 830, 20, 6, 830, 830, 830, 175, 830, 419, 80, 205, 388, 9, 1522, 137, 15, 510, 324, 1522, 252, 193, 83, 932, 724, 58, 1179, 386, 1522, 5, 1522, 133, 1522, 381, 785, 1522, 51, 64, 8, 321, 311, 773, 1232, 16, 1232, 99, 48, 149, 1232, 1232, 272, 321, 277, 544, 1232, 9, 818, 785, 732, 98, 43, 395, 69, 75, 16, 662, 144, 1232, 1064, 91, 1232, 506, 1232, 1024, 347, 61, 992, 365, 1024, 267, 730, 6, 176, 60, 1024, 505, 783, 1024, 52, 1021, 311, 1024, 1024, 330, 15, 712, 1024, 1024, 250, 333, 6, 333, 38, 127, 305, 830, 830, 377, 21, 830, 830, 830, 121, 468, 730, 117, 17, 148, 830, 167, 136, 73, 830, 830, 830, 550, 830, 26, 21, 785, 830, 70, 261, 75, 830, 532, 830, 1622, 31, 200, 1622, 218, 464, 419, 176, 1622, 1622, 871, 821, 1622, 475, 1622, 1622, 267, 1256, 490, 157, 240, 141, 960, 64, 199, 226, 1622, 283, 1622, 1622, 426, 1610, 732, 46, 264, 169, 1125, 51, 119, 271, 430, 1125, 1125, 240, 134, 1125, 170, 1125, 1124, 1125, 779, 1125, 1125, 316, 1122, 548, 225, 1125, 372, 941, 189, 31, 1125, 1125, 4, 462, 51, 463, 463, 322, 381, 407, 83, 463, 380, 461, 41, 178, 462, 463, 463, 463, 155, 267, 250, 463, 463, 114, 123, 123, 463, 463, 114, 111, 81, 463, 208, 820, 20, 887, 1340, 367, 1340, 26, 836, 163, 1340, 37, 1340, 1002, 909, 519, 781, 42, 71, 81, 991, 1340, 820, 253, 194, 204, 1340, 95, 240, 207, 111, 91, 77, 1434, 155, 907, 139, 153, 948, 106, 633, 248, 130, 522, 38, 140, 793, 330, 399, 106, 1434, 521, 157, 1434, 73, 1434, 4, 1434, 35, 500, 315, 97, 54, 15, 62, 855, 855, 21, 594, 855, 159, 855, 855, 855, 256, 130, 380, 143, 332, 214, 855, 855, 669, 523, 324, 111, 854, 268, 125, 114, 98, 52, 94, 211, 459, 18, 263, 1031, 151, 349, 1031, 122, 1031, 1031, 1031, 164, 325, 55, 67, 1031, 1031, 1005, 588, 585, 220, 117, 296, 112, 0, 78, 65, 672, 1031, 1031, 213, 1031, 141, 31, 558, 130, 86, 507, 6, 31, 373, 1289, 71, 55, 281, 6, 76, 412, 5, 1289, 584, 1289, 638, 568, 631, 338, 114, 373, 1289, 1289, 851, 93, 170, 1289, 284, 82, 891, 13, 1638, 131, 148, 127, 101, 141, 1638, 1638, 1638, 59, 1638, 212, 1638, 215, 1274, 1638, 90, 1635, 1315, 148, 284, 113, 89, 628, 48, 763, 132, 30, 56, 1296, 363, 110, 553, 1102, 166, 272, 58, 416, 1102, 163, 1102, 193, 1102, 159, 1102, 677, 77, 1102, 128, 516, 36, 43, 116, 1101, 279, 115, 75, 31, 1102, 200, 4, 402]}, "baseline": {"name": "chosen", "wins": 389, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+kto_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 43173, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+kto_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+kto_pythia12-0b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia12-0b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 13:50:29.156230", "total": 512, "seed": 0, "exp_name": "archangel_sft+kto_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 246, "lengths": [204, 184, 59, 161, 130, 166, 57, 74, 832, 217, 100, 134, 74, 47, 335, 126, 275, 86, 117, 132, 107, 146, 552, 92, 42, 218, 6, 97, 127, 187, 124, 405, 742, 111, 146, 100, 46, 256, 65, 180, 171, 131, 183, 205, 185, 108, 88, 96, 360, 51, 519, 123, 84, 191, 292, 94, 742, 383, 6, 204, 81, 217, 447, 98, 107, 62, 85, 80, 472, 82, 51, 78, 437, 920, 115, 115, 170, 163, 119, 159, 184, 79, 62, 247, 6, 105, 107, 265, 49, 104, 275, 162, 19, 6, 291, 251, 179, 65, 11, 149, 24, 42, 182, 847, 130, 230, 245, 136, 213, 111, 11, 74, 68, 248, 103, 152, 73, 134, 40, 109, 52, 173, 559, 148, 316, 213, 374, 45, 190, 194, 118, 586, 314, 103, 155, 154, 24, 100, 182, 168, 204, 38, 70, 432, 1026, 120, 132, 605, 224, 208, 96, 87, 265, 284, 87, 7, 208, 178, 209, 99, 301, 338, 257, 55, 200, 580, 67, 474, 358, 483, 151, 78, 204, 379, 207, 65, 93, 813, 249, 393, 189, 52, 376, 428, 290, 21, 65, 68, 128, 83, 813, 6, 45, 523, 330, 83, 535, 132, 542, 99, 101, 114, 248, 119, 181, 14, 140, 170, 138, 40, 65, 70, 193, 258, 217, 83, 318, 65, 153, 720, 147, 224, 73, 303, 152, 73, 93, 357, 152, 115, 90, 146, 88, 154, 147, 142, 273, 347, 317, 56, 228, 107, 92, 114, 172, 101, 123, 101, 130, 128, 91, 97, 291, 587, 6, 288, 6, 102, 121, 144, 49, 180, 213, 123, 240, 240, 126, 132, 9, 74, 130, 240, 78, 101, 240, 171, 87, 144, 185, 141, 109, 77, 73, 153, 200, 130, 42, 62, 119, 253, 273, 4, 117, 96, 214, 131, 524, 148, 1282, 217, 154, 203, 479, 92, 266, 14, 54, 189, 176, 194, 44, 18, 175, 67, 225, 178, 50, 235, 144, 132, 85, 133, 89, 219, 31, 97, 203, 280, 60, 139, 160, 1375, 201, 118, 217, 174, 500, 91, 95, 176, 89, 88, 98, 172, 147, 445, 105, 123, 201, 52, 298, 6, 280, 313, 127, 23, 241, 176, 131, 94, 88, 105, 265, 142, 51, 6, 364, 68, 131, 254, 117, 78, 240, 72, 401, 78, 202, 74, 108, 206, 377, 88, 210, 11, 36, 349, 128, 440, 35, 82, 915, 74, 182, 274, 296, 122, 124, 107, 168, 83, 42, 158, 73, 779, 441, 605, 915, 99, 350, 177, 49, 260, 121, 54, 189, 33, 207, 99, 49, 167, 7, 45, 297, 153, 131, 230, 431, 52, 83, 191, 12, 165, 79, 168, 91, 46, 160, 141, 489, 78, 82, 679, 304, 46, 182, 41, 6, 89, 253, 161, 124, 185, 78, 25, 112, 123, 159, 97, 140, 611, 88, 216, 89, 124, 155, 60, 105, 216, 76, 103, 97, 9, 92, 384, 35, 80, 189, 95, 29, 104, 417, 53, 88, 645, 72, 134, 100, 24, 115, 63, 210, 204, 97, 109, 506, 581, 133, 230, 227, 95, 154, 43, 124, 86, 231, 193, 240, 25, 158, 71, 6, 177]}, "baseline": {"name": "chosen", "wins": 232, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+kto_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 56937, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+kto_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+kto_llama7b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama7b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 14:06:25.031203", "total": 512, "seed": 0, "exp_name": "archangel_sft+kto_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 265, "lengths": [176, 207, 399, 61, 298, 52, 209, 74, 351, 1086, 205, 317, 131, 122, 254, 106, 401, 146, 87, 78, 98, 410, 356, 100, 70, 146, 6, 88, 44, 76, 251, 322, 207, 59, 341, 89, 12, 280, 102, 171, 214, 111, 284, 127, 742, 60, 165, 91, 672, 51, 742, 256, 190, 125, 141, 72, 742, 276, 6, 102, 100, 220, 678, 742, 210, 50, 114, 64, 151, 133, 114, 72, 116, 454, 72, 70, 105, 106, 95, 143, 471, 142, 61, 431, 53, 149, 72, 255, 164, 150, 167, 114, 108, 28, 108, 321, 126, 114, 27, 75, 48, 454, 96, 679, 997, 615, 451, 223, 107, 150, 17, 39, 185, 553, 46, 223, 92, 99, 60, 140, 59, 182, 158, 156, 254, 305, 207, 466, 110, 316, 191, 247, 335, 329, 178, 101, 43, 397, 160, 328, 156, 72, 177, 97, 1026, 418, 267, 172, 278, 116, 71, 1026, 622, 109, 97, 6, 82, 113, 687, 83, 228, 432, 136, 39, 360, 102, 97, 166, 331, 433, 124, 683, 148, 207, 278, 81, 122, 813, 151, 605, 260, 120, 79, 268, 511, 38, 21, 814, 137, 297, 199, 6, 149, 274, 595, 98, 442, 146, 567, 152, 238, 84, 77, 97, 243, 13, 122, 210, 65, 63, 91, 50, 421, 115, 233, 54, 294, 86, 275, 519, 195, 348, 68, 361, 118, 210, 69, 903, 90, 78, 53, 360, 390, 1026, 135, 97, 136, 352, 103, 178, 103, 65, 188, 1026, 198, 132, 187, 66, 212, 433, 127, 137, 140, 255, 15, 129, 8, 239, 81, 97, 131, 42, 181, 50, 183, 120, 229, 240, 29, 91, 144, 85, 77, 89, 240, 73, 240, 102, 173, 234, 210, 97, 239, 74, 142, 129, 240, 70, 81, 162, 312, 18, 91, 77, 288, 451, 1283, 247, 299, 173, 69, 562, 227, 275, 500, 14, 51, 189, 129, 106, 27, 15, 224, 71, 63, 1283, 238, 491, 143, 223, 64, 189, 77, 1374, 63, 170, 254, 446, 48, 121, 89, 503, 223, 93, 99, 276, 249, 243, 136, 592, 83, 177, 97, 106, 182, 1310, 145, 415, 187, 29, 281, 6, 244, 683, 109, 32, 292, 151, 102, 106, 84, 147, 309, 340, 60, 11, 683, 65, 106, 683, 104, 61, 132, 81, 335, 80, 683, 76, 93, 190, 114, 82, 312, 10, 76, 99, 52, 256, 137, 51, 459, 100, 213, 468, 407, 242, 915, 510, 229, 79, 127, 912, 115, 227, 89, 223, 591, 153, 345, 558, 156, 93, 299, 273, 128, 464, 582, 78, 88, 111, 42, 45, 259, 348, 147, 266, 506, 18, 99, 128, 33, 154, 239, 68, 51, 254, 76, 444, 276, 52, 911, 913, 789, 42, 86, 71, 6, 137, 301, 229, 616, 128, 161, 109, 195, 123, 140, 91, 456, 467, 307, 154, 94, 152, 1431, 206, 192, 99, 71, 101, 238, 8, 91, 164, 112, 55, 88, 151, 124, 1593, 125, 76, 136, 229, 205, 332, 92, 962, 69, 298, 79, 167, 49, 81, 112, 451, 90, 301, 112, 329, 82, 72, 66, 132, 176, 113, 156, 147, 251, 181, 6, 272]}, "baseline": {"name": "chosen", "wins": 218, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+kto_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 33667, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+kto_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+kto_llama13b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama13b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 14:22:33.162555", "total": 512, "seed": 0, "exp_name": "archangel_sft+kto_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 334, "lengths": [47, 491, 217, 25, 119, 105, 66, 100, 273, 148, 315, 383, 40, 42, 191, 92, 13, 53, 64, 75, 43, 48, 89, 32, 82, 104, 6, 359, 55, 57, 140, 188, 1404, 101, 104, 84, 31, 122, 77, 125, 58, 121, 121, 278, 320, 43, 116, 140, 173, 26, 36, 164, 47, 493, 530, 69, 742, 527, 6, 151, 153, 392, 184, 53, 149, 40, 81, 156, 194, 64, 56, 53, 300, 923, 54, 24, 145, 60, 85, 42, 49, 108, 49, 644, 11, 182, 39, 351, 104, 56, 74, 59, 44, 13, 51, 172, 316, 138, 14, 17, 39, 45, 95, 103, 85, 550, 160, 190, 67, 68, 19, 36, 73, 314, 27, 53, 90, 102, 61, 27, 46, 102, 309, 105, 799, 176, 1392, 198, 58, 156, 59, 578, 387, 64, 101, 75, 63, 166, 879, 222, 157, 47, 91, 347, 1026, 160, 122, 194, 298, 168, 79, 78, 296, 296, 54, 17, 202, 77, 326, 153, 115, 814, 391, 129, 174, 95, 49, 356, 33, 541, 54, 32, 85, 513, 814, 37, 155, 27, 1455, 187, 133, 61, 51, 476, 421, 13, 11, 110, 62, 102, 22, 6, 37, 140, 99, 92, 50, 161, 724, 167, 165, 27, 235, 140, 138, 45, 177, 136, 83, 122, 54, 36, 25, 1711, 326, 49, 92, 24, 156, 781, 23, 145, 53, 243, 136, 68, 44, 57, 69, 136, 78, 368, 113, 79, 101, 125, 65, 347, 81, 63, 77, 20, 59, 103, 41, 84, 173, 49, 117, 778, 131, 57, 194, 387, 27, 72, 48, 244, 99, 309, 31, 67, 160, 57, 307, 23, 157, 149, 16, 272, 91, 109, 59, 52, 157, 40, 111, 141, 25, 154, 155, 170, 240, 38, 59, 134, 170, 44, 167, 374, 99, 31, 93, 134, 30, 118, 462, 74, 143, 672, 66, 185, 309, 196, 225, 18, 43, 81, 207, 107, 37, 6, 219, 82, 45, 29, 46, 525, 100, 135, 46, 80, 16, 111, 49, 55, 153, 212, 39, 71, 117, 201, 161, 75, 149, 394, 144, 62, 28, 109, 74, 69, 136, 68, 120, 85, 27, 28, 885, 60, 109, 11, 392, 683, 278, 61, 157, 34, 72, 109, 132, 107, 158, 183, 40, 14, 302, 71, 115, 330, 423, 82, 70, 61, 645, 61, 395, 139, 117, 225, 219, 113, 179, 21, 94, 78, 31, 257, 87, 28, 656, 55, 190, 163, 263, 264, 121, 485, 32, 76, 37, 31, 19, 244, 915, 246, 224, 64, 289, 162, 31, 77, 21, 137, 31, 12, 127, 24, 70, 63, 10, 35, 165, 128, 248, 172, 172, 56, 70, 126, 199, 139, 47, 14, 68, 26, 141, 113, 664, 117, 65, 386, 257, 43, 86, 41, 6, 114, 207, 100, 243, 51, 135, 26, 57, 30, 109, 32, 315, 589, 161, 119, 151, 113, 91, 75, 137, 98, 148, 49, 95, 6, 91, 58, 51, 64, 31, 80, 92, 118, 383, 74, 65, 328, 45, 64, 99, 128, 52, 289, 79, 206, 23, 93, 285, 38, 117, 262, 166, 555, 83, 12, 53, 17, 241, 43, 181, 142, 80, 53, 14, 162]}, "baseline": {"name": "chosen", "wins": 159, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+kto_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 57845, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+kto_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+kto_llama30b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama30b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 16, "gradient_accumulation_steps": 2, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 14:39:06.995860", "total": 512, "seed": 0, "exp_name": "archangel_sft+ppo_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 67, "lengths": [113, 158, 28, 122, 20, 1153, 30, 45, 414, 37, 355, 63, 1153, 21, 33, 194, 340, 26, 44, 15, 865, 75, 90, 143, 182, 89, 5, 197, 40, 111, 265, 154, 116, 82, 230, 35, 29, 820, 50, 160, 14, 535, 17, 285, 50, 68, 21, 78, 142, 42, 163, 134, 47, 47, 104, 41, 265, 94, 11, 566, 210, 14, 125, 80, 41, 26, 158, 111, 145, 71, 84, 19, 97, 70, 17, 31, 350, 74, 33, 318, 40, 36, 98, 347, 42, 192, 72, 1522, 169, 107, 52, 42, 115, 10, 12, 58, 41, 306, 35, 94, 128, 92, 91, 125, 1231, 682, 18, 78, 84, 512, 20, 60, 49, 76, 110, 35, 50, 732, 95, 41, 8, 21, 502, 159, 94, 21, 82, 44, 62, 295, 121, 93, 468, 42, 241, 405, 30, 84, 47, 129, 83, 39, 203, 27, 209, 110, 16, 1024, 68, 154, 62, 262, 1024, 182, 22, 24, 84, 88, 18, 134, 117, 57, 91, 282, 830, 179, 37, 57, 17, 256, 184, 77, 38, 59, 830, 53, 116, 31, 90, 95, 18, 36, 50, 116, 87, 6, 4, 37, 57, 194, 16, 6, 17, 123, 25, 173, 64, 343, 121, 151, 82, 200, 74, 339, 354, 22, 113, 310, 48, 59, 48, 25, 109, 16, 360, 463, 89, 74, 106, 111, 81, 47, 54, 437, 58, 145, 73, 91, 37, 111, 15, 415, 512, 427, 1125, 587, 56, 123, 108, 278, 108, 47, 247, 23, 169, 311, 129, 91, 72, 286, 94, 167, 230, 141, 1118, 26, 24, 11, 8, 72, 186, 89, 90, 155, 184, 10, 49, 61, 47, 200, 30, 230, 53, 62, 463, 11, 106, 53, 94, 59, 440, 72, 223, 12, 133, 59, 231, 94, 144, 149, 45, 22, 50, 71, 39, 677, 213, 215, 43, 88, 42, 19, 75, 42, 30, 458, 38, 107, 136, 26, 527, 29, 78, 128, 15, 51, 133, 43, 131, 103, 88, 66, 344, 63, 33, 204, 35, 98, 68, 1434, 180, 587, 40, 108, 61, 57, 238, 139, 3, 93, 29, 10, 140, 84, 240, 79, 20, 40, 159, 66, 233, 15, 90, 115, 145, 34, 17, 241, 148, 80, 30, 9, 33, 181, 80, 16, 176, 327, 564, 191, 125, 105, 14, 229, 263, 539, 19, 38, 93, 336, 55, 69, 401, 12, 24, 52, 31, 623, 284, 21, 155, 142, 70, 219, 113, 22, 96, 36, 12, 189, 40, 1031, 52, 65, 60, 210, 811, 93, 16, 12, 23, 34, 184, 48, 33, 9, 151, 15, 106, 950, 9, 10, 34, 53, 837, 107, 125, 9, 53, 88, 31, 9, 1287, 25, 36, 21, 24, 132, 69, 77, 12, 53, 234, 75, 31, 9, 26, 101, 56, 138, 109, 115, 51, 19, 23, 1638, 46, 43, 711, 52, 100, 57, 99, 129, 51, 70, 161, 251, 18, 44, 45, 188, 65, 45, 11, 47, 215, 16, 399, 361, 76, 49, 70, 114, 74, 83, 229, 28, 25, 36, 181, 70, 127, 87, 1102, 312, 269, 478, 36, 62, 1102, 221, 55, 12, 78, 11, 117, 41, 20, 5, 119, 26]}, "baseline": {"name": "chosen", "wins": 413, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+ppo_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 45331, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+ppo_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+ppo_pythia1-4b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia1-4b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 14:54:48.548391", "total": 512, "seed": 0, "exp_name": "archangel_sft+ppo_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 113, "lengths": [35, 101, 77, 217, 76, 176, 47, 100, 1153, 213, 73, 76, 59, 19, 57, 230, 33, 53, 81, 76, 28, 497, 18, 7, 145, 95, 5, 35, 33, 111, 492, 79, 61, 357, 14, 104, 14, 16, 353, 119, 116, 39, 48, 25, 807, 162, 67, 76, 830, 97, 174, 830, 63, 357, 35, 62, 292, 128, 5, 830, 80, 28, 91, 60, 170, 48, 74, 94, 160, 70, 50, 21, 99, 26, 25, 110, 245, 59, 32, 20, 91, 82, 116, 104, 113, 207, 24, 10, 117, 22, 10, 80, 6, 68, 80, 173, 62, 67, 12, 89, 24, 82, 54, 167, 12, 29, 65, 65, 156, 21, 46, 15, 79, 81, 13, 340, 48, 43, 144, 123, 37, 74, 442, 97, 44, 29, 192, 65, 68, 192, 71, 106, 89, 72, 157, 280, 18, 8, 25, 21, 107, 28, 98, 70, 1024, 312, 103, 124, 26, 1024, 95, 110, 549, 60, 35, 6, 19, 86, 46, 78, 313, 48, 175, 56, 350, 59, 24, 71, 153, 56, 43, 297, 110, 49, 56, 40, 77, 151, 312, 158, 42, 32, 60, 576, 259, 61, 52, 107, 83, 373, 66, 6, 28, 102, 68, 18, 38, 24, 46, 137, 355, 34, 113, 161, 85, 16, 62, 120, 85, 100, 168, 47, 156, 683, 219, 57, 147, 112, 567, 7, 67, 202, 58, 89, 59, 180, 34, 44, 22, 132, 87, 160, 42, 16, 101, 106, 82, 317, 116, 31, 40, 99, 36, 193, 65, 87, 84, 24, 75, 482, 274, 46, 122, 92, 7, 114, 37, 66, 79, 54, 17, 45, 39, 44, 463, 51, 48, 53, 6, 166, 183, 48, 463, 75, 264, 81, 52, 15, 18, 284, 74, 56, 213, 57, 191, 70, 11, 35, 79, 128, 65, 60, 38, 107, 26, 96, 19, 108, 62, 42, 42, 43, 130, 7, 23, 45, 59, 103, 25, 94, 13, 10, 60, 58, 74, 17, 87, 106, 538, 93, 165, 91, 57, 183, 97, 71, 67, 155, 39, 1434, 63, 477, 120, 418, 73, 295, 168, 280, 150, 196, 30, 9, 121, 39, 117, 276, 100, 69, 241, 443, 202, 13, 282, 157, 38, 154, 116, 26, 33, 80, 20, 15, 13, 79, 58, 6, 73, 66, 146, 193, 219, 63, 54, 181, 100, 64, 66, 45, 421, 137, 57, 141, 855, 5, 35, 54, 111, 57, 28, 111, 65, 38, 145, 152, 71, 439, 167, 88, 22, 33, 19, 25, 37, 57, 22, 295, 286, 128, 61, 47, 59, 62, 66, 61, 236, 63, 28, 27, 62, 8, 9, 11, 121, 216, 46, 98, 225, 12, 54, 136, 6, 119, 25, 131, 48, 42, 91, 41, 112, 62, 49, 31, 57, 108, 72, 51, 5, 57, 77, 192, 814, 141, 312, 88, 17, 139, 59, 4, 56, 26, 106, 47, 31, 83, 157, 93, 17, 32, 67, 59, 257, 3, 57, 140, 3, 104, 28, 40, 141, 85, 177, 32, 229, 123, 72, 58, 199, 39, 512, 719, 90, 87, 45, 91, 53, 25, 72, 125, 555, 124, 9, 49, 184, 50, 255, 47, 202, 3, 63, 40, 1101, 98]}, "baseline": {"name": "chosen", "wins": 357, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+ppo_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 55119, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+ppo_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+ppo_pythia2-8b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia2-8b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 15:10:27.746145", "total": 512, "seed": 0, "exp_name": "archangel_sft+ppo_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 125, "lengths": [49, 33, 177, 19, 64, 33, 59, 39, 1153, 339, 198, 26, 150, 49, 225, 222, 124, 142, 53, 133, 15, 65, 134, 21, 248, 115, 4, 466, 45, 40, 27, 88, 214, 830, 95, 75, 13, 11, 91, 94, 75, 272, 43, 61, 830, 50, 63, 67, 144, 8, 21, 87, 123, 504, 62, 93, 29, 147, 6, 830, 118, 274, 178, 160, 35, 23, 1522, 91, 30, 216, 57, 1522, 53, 142, 13, 68, 142, 110, 132, 92, 143, 18, 63, 35, 162, 708, 60, 23, 43, 20, 147, 48, 182, 7, 9, 98, 140, 130, 9, 25, 21, 69, 22, 457, 9, 202, 93, 125, 20, 154, 19, 74, 62, 181, 25, 60, 33, 93, 36, 22, 79, 17, 97, 69, 219, 101, 69, 134, 19, 153, 38, 97, 120, 139, 45, 1024, 82, 24, 113, 127, 147, 17, 70, 146, 1024, 175, 232, 159, 113, 1024, 139, 1024, 89, 207, 38, 11, 101, 8, 69, 87, 440, 24, 168, 49, 830, 12, 227, 43, 5, 55, 496, 179, 66, 41, 48, 120, 135, 20, 297, 77, 40, 16, 31, 201, 423, 26, 4, 29, 42, 147, 49, 10, 270, 27, 147, 7, 1622, 14, 206, 245, 49, 81, 131, 526, 144, 7, 20, 206, 129, 23, 38, 55, 221, 1628, 138, 60, 50, 49, 154, 80, 240, 53, 28, 126, 63, 171, 44, 40, 53, 153, 212, 113, 222, 30, 111, 535, 47, 1125, 110, 252, 5, 12, 327, 19, 31, 132, 87, 60, 40, 115, 97, 21, 78, 70, 6, 104, 18, 18, 44, 146, 50, 41, 154, 81, 374, 18, 73, 25, 6, 32, 41, 97, 289, 81, 216, 28, 24, 26, 124, 57, 74, 50, 56, 30, 36, 10, 463, 83, 145, 98, 42, 256, 63, 81, 168, 117, 46, 70, 7, 62, 80, 181, 10, 22, 126, 9, 81, 269, 17, 150, 33, 4, 178, 1340, 17, 36, 164, 201, 107, 46, 45, 30, 18, 735, 47, 12, 123, 168, 30, 101, 55, 140, 44, 1434, 67, 57, 128, 5, 169, 108, 300, 26, 72, 78, 79, 60, 74, 31, 230, 46, 78, 20, 129, 56, 93, 27, 42, 11, 84, 40, 45, 31, 58, 544, 23, 4, 153, 56, 106, 21, 35, 135, 59, 63, 207, 56, 87, 133, 104, 178, 145, 855, 51, 6, 82, 22, 18, 93, 414, 72, 79, 100, 186, 441, 79, 24, 323, 257, 30, 138, 20, 32, 16, 88, 848, 92, 1031, 151, 138, 51, 66, 11, 27, 82, 228, 37, 83, 89, 126, 8, 8, 10, 152, 423, 432, 77, 44, 19, 139, 121, 144, 212, 12, 196, 126, 35, 46, 943, 371, 182, 16, 30, 93, 66, 92, 30, 5, 33, 238, 71, 264, 19, 221, 39, 34, 74, 48, 151, 93, 71, 115, 75, 176, 34, 245, 58, 792, 91, 14, 59, 59, 41, 214, 34, 27, 110, 233, 49, 822, 427, 214, 38, 49, 128, 50, 214, 117, 320, 45, 85, 532, 96, 58, 67, 38, 76, 61, 15, 86, 93, 42, 23, 104, 480, 83, 48, 73, 25, 43, 26, 306, 146]}, "baseline": {"name": "chosen", "wins": 349, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+ppo_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 57363, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+ppo_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+ppo_pythia6-9b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia6-9b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 15:26:46.782038", "total": 512, "seed": 0, "exp_name": "archangel_sft+ppo_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 97, "lengths": [369, 116, 180, 48, 665, 1153, 61, 13, 353, 71, 119, 46, 307, 22, 253, 286, 22, 1153, 88, 34, 26, 129, 38, 44, 86, 21, 11, 76, 29, 151, 85, 58, 9, 128, 33, 127, 132, 49, 463, 74, 91, 83, 57, 54, 39, 21, 167, 109, 249, 46, 105, 71, 112, 50, 46, 378, 268, 171, 6, 237, 32, 17, 141, 830, 100, 57, 101, 99, 34, 24, 55, 47, 42, 100, 25, 27, 116, 108, 89, 10, 15, 90, 29, 193, 5, 52, 273, 33, 42, 1522, 97, 102, 13, 8, 12, 71, 170, 215, 16, 38, 21, 119, 44, 69, 13, 6, 187, 71, 53, 136, 4, 35, 60, 213, 39, 58, 53, 27, 33, 25, 77, 73, 39, 452, 77, 74, 38, 35, 56, 635, 60, 1024, 87, 286, 31, 82, 26, 126, 228, 125, 120, 2, 36, 80, 913, 145, 677, 702, 210, 15, 121, 954, 186, 504, 15, 21, 23, 67, 36, 18, 10, 128, 23, 126, 226, 152, 830, 214, 23, 98, 124, 15, 164, 255, 325, 53, 830, 15, 30, 64, 57, 235, 42, 830, 153, 43, 68, 169, 126, 40, 28, 5, 133, 56, 60, 23, 189, 1622, 60, 70, 27, 1622, 164, 285, 138, 19, 132, 66, 93, 28, 44, 766, 48, 137, 170, 337, 169, 47, 78, 116, 130, 567, 17, 50, 50, 21, 29, 20, 41, 50, 864, 448, 157, 168, 1125, 172, 186, 231, 32, 100, 83, 8, 51, 50, 97, 137, 115, 32, 20, 305, 244, 203, 367, 269, 6, 275, 10, 60, 62, 45, 77, 92, 187, 18, 463, 64, 108, 285, 5, 45, 27, 463, 29, 156, 463, 103, 306, 109, 424, 89, 312, 45, 106, 333, 463, 463, 43, 69, 59, 99, 53, 470, 88, 86, 11, 166, 39, 59, 189, 135, 519, 45, 72, 47, 325, 409, 10, 6, 256, 545, 33, 6, 183, 234, 194, 48, 24, 136, 1113, 83, 38, 59, 36, 193, 185, 32, 87, 77, 15, 32, 183, 163, 1283, 1098, 188, 220, 3, 79, 86, 176, 420, 78, 208, 60, 481, 67, 792, 136, 93, 125, 45, 24, 30, 40, 26, 8, 81, 855, 65, 37, 121, 855, 30, 100, 64, 9, 65, 22, 149, 68, 77, 855, 80, 39, 174, 180, 160, 631, 285, 133, 37, 53, 46, 588, 102, 7, 172, 204, 172, 257, 88, 253, 410, 68, 82, 120, 140, 8, 14, 366, 286, 11, 92, 142, 31, 98, 0, 70, 202, 10, 11, 79, 15, 100, 72, 419, 99, 102, 32, 41, 772, 118, 149, 115, 88, 203, 3, 183, 87, 194, 86, 178, 161, 74, 24, 172, 73, 165, 273, 23, 5, 228, 1289, 96, 73, 36, 4, 528, 252, 60, 67, 150, 99, 1363, 42, 57, 139, 57, 85, 279, 175, 172, 696, 52, 27, 37, 58, 27, 111, 128, 256, 33, 9, 142, 19, 149, 135, 1638, 317, 54, 225, 45, 49, 93, 19, 84, 93, 121, 136, 184, 1102, 99, 456, 36, 248, 1102, 13, 500, 165, 102, 7, 69, 274, 51, 410, 135, 1102, 79, 55, 20, 1084, 103]}, "baseline": {"name": "chosen", "wins": 384, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+ppo_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 51805, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+ppo_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+ppo_pythia12-0b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia12-0b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 15:43:02.067593", "total": 512, "seed": 0, "exp_name": "archangel_sft+ppo_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 212, "lengths": [9, 119, 43, 13, 38, 62, 48, 75, 47, 276, 282, 434, 59, 44, 38, 32, 426, 34, 47, 53, 30, 90, 13, 19, 51, 65, 6, 17, 84, 99, 117, 370, 210, 92, 95, 79, 16, 11, 100, 272, 71, 92, 55, 57, 359, 50, 31, 84, 239, 26, 102, 61, 141, 171, 44, 109, 402, 41, 6, 117, 74, 106, 62, 93, 75, 58, 199, 54, 145, 29, 96, 91, 241, 53, 36, 38, 193, 15, 40, 30, 22, 92, 40, 99, 6, 121, 42, 96, 20, 85, 252, 66, 73, 5, 60, 291, 313, 77, 19, 20, 16, 17, 106, 299, 40, 124, 186, 59, 45, 122, 42, 32, 92, 95, 25, 37, 7, 94, 22, 121, 48, 127, 131, 25, 98, 51, 196, 15, 9, 242, 7, 64, 873, 28, 85, 146, 18, 99, 210, 41, 167, 53, 89, 396, 491, 170, 80, 158, 156, 73, 103, 82, 146, 123, 45, 6, 14, 79, 61, 73, 75, 80, 75, 60, 491, 8, 58, 94, 5, 117, 108, 35, 23, 85, 147, 57, 198, 14, 228, 171, 96, 60, 13, 130, 232, 4, 4, 46, 38, 37, 169, 6, 33, 70, 132, 17, 75, 120, 199, 103, 30, 3, 46, 96, 286, 41, 30, 148, 89, 28, 69, 53, 18, 52, 99, 59, 50, 39, 302, 77, 11, 361, 39, 142, 16, 104, 21, 6, 46, 129, 212, 154, 141, 38, 26, 72, 93, 271, 156, 60, 19, 13, 27, 22, 55, 83, 104, 60, 70, 260, 102, 76, 150, 164, 6, 332, 6, 9, 113, 67, 10, 81, 164, 92, 73, 18, 111, 108, 13, 81, 69, 84, 127, 105, 38, 10, 68, 62, 25, 135, 51, 91, 53, 8, 64, 9, 240, 152, 105, 150, 163, 14, 57, 50, 11, 91, 302, 102, 338, 119, 53, 157, 142, 65, 31, 21, 36, 72, 20, 79, 35, 26, 158, 30, 10, 24, 92, 110, 108, 189, 32, 141, 27, 107, 89, 16, 67, 129, 42, 71, 141, 39, 130, 163, 52, 33, 121, 175, 21, 60, 64, 9, 79, 94, 67, 167, 49, 34, 203, 70, 59, 25, 149, 20, 71, 137, 102, 85, 89, 57, 42, 94, 55, 131, 49, 6, 204, 37, 166, 149, 63, 111, 85, 453, 122, 96, 201, 128, 70, 154, 25, 120, 81, 10, 65, 41, 34, 35, 120, 24, 87, 72, 160, 159, 278, 107, 128, 106, 83, 43, 23, 52, 193, 123, 97, 267, 146, 114, 74, 139, 12, 131, 35, 101, 42, 21, 94, 68, 44, 24, 6, 6, 271, 163, 65, 103, 68, 7, 32, 136, 6, 75, 62, 25, 43, 27, 152, 40, 93, 74, 13, 71, 120, 104, 71, 51, 6, 106, 53, 106, 123, 79, 138, 36, 19, 122, 30, 20, 133, 39, 126, 72, 17, 94, 159, 30, 87, 46, 12, 9, 92, 4, 56, 85, 46, 108, 26, 15, 132, 101, 13, 34, 16, 201, 53, 105, 20, 117, 58, 41, 79, 57, 48, 118, 117, 12, 173, 194, 83, 127, 25, 12, 40, 962, 78, 19, 72, 80, 163, 30, 6, 52]}, "baseline": {"name": "chosen", "wins": 269, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+ppo_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 48901, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+ppo_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+ppo_llama7b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama7b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 15:58:16.718852", "total": 512, "seed": 0, "exp_name": "archangel_sft+ppo_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 247, "lengths": [17, 98, 68, 62, 144, 40, 96, 63, 153, 64, 71, 95, 54, 134, 82, 60, 335, 40, 72, 117, 121, 12, 38, 15, 60, 181, 6, 80, 21, 35, 80, 134, 158, 89, 120, 106, 50, 51, 100, 123, 48, 17, 54, 145, 217, 70, 118, 134, 138, 28, 13, 93, 94, 167, 201, 62, 129, 176, 5, 159, 50, 26, 162, 26, 86, 61, 124, 109, 18, 78, 64, 52, 59, 99, 54, 13, 294, 42, 101, 23, 59, 21, 96, 33, 36, 126, 49, 218, 134, 14, 36, 21, 9, 6, 6, 126, 180, 117, 15, 38, 54, 30, 39, 41, 186, 78, 66, 99, 119, 32, 12, 49, 232, 69, 16, 85, 27, 89, 15, 10, 23, 149, 117, 56, 111, 98, 68, 47, 50, 86, 103, 21, 381, 94, 50, 41, 38, 139, 126, 214, 104, 56, 121, 172, 874, 156, 57, 109, 337, 66, 161, 131, 423, 142, 64, 5, 564, 225, 131, 50, 101, 98, 163, 41, 183, 48, 11, 82, 7, 169, 37, 19, 33, 161, 254, 11, 6, 42, 227, 115, 59, 125, 29, 130, 307, 51, 7, 116, 45, 162, 73, 6, 39, 92, 117, 11, 99, 55, 172, 183, 47, 17, 76, 186, 131, 24, 80, 227, 117, 145, 39, 48, 174, 588, 82, 24, 50, 47, 407, 164, 10, 145, 63, 195, 4, 132, 68, 20, 85, 116, 35, 180, 164, 20, 74, 67, 61, 19, 170, 72, 189, 79, 93, 34, 45, 141, 44, 4, 118, 336, 63, 68, 266, 78, 11, 82, 6, 12, 155, 98, 41, 32, 87, 84, 134, 19, 77, 77, 12, 102, 108, 45, 77, 60, 123, 10, 82, 60, 87, 65, 157, 189, 182, 12, 60, 59, 239, 51, 92, 92, 45, 11, 145, 47, 15, 123, 236, 94, 262, 52, 122, 233, 67, 42, 229, 31, 20, 54, 84, 161, 22, 4, 195, 74, 12, 74, 64, 217, 187, 14, 41, 34, 37, 41, 45, 49, 91, 230, 39, 86, 31, 48, 75, 30, 241, 127, 155, 68, 27, 238, 56, 85, 81, 103, 56, 144, 53, 46, 129, 22, 17, 44, 130, 154, 254, 13, 92, 16, 68, 110, 67, 158, 91, 394, 54, 41, 343, 34, 63, 105, 31, 83, 105, 20, 191, 32, 155, 86, 93, 181, 58, 27, 124, 6, 108, 62, 19, 77, 125, 20, 53, 103, 157, 247, 256, 69, 41, 102, 37, 96, 26, 41, 70, 65, 28, 49, 78, 144, 130, 90, 53, 10, 20, 74, 36, 6, 86, 41, 60, 44, 6, 24, 42, 80, 88, 99, 86, 11, 62, 26, 4, 97, 16, 12, 111, 13, 61, 31, 111, 117, 11, 370, 122, 62, 68, 26, 5, 135, 354, 31, 213, 82, 98, 13, 34, 50, 78, 98, 115, 61, 15, 20, 78, 39, 62, 31, 134, 84, 56, 60, 53, 11, 89, 144, 73, 67, 41, 57, 65, 110, 124, 49, 54, 256, 37, 67, 48, 46, 27, 160, 96, 18, 22, 125, 116, 10, 31, 589, 78, 43, 44, 29, 85, 10, 76, 65, 85, 79, 53, 43, 15, 9]}, "baseline": {"name": "chosen", "wins": 236, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+ppo_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 40151, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+ppo_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+ppo_llama13b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama13b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 16:14:09.109198", "total": 512, "seed": 0, "exp_name": "archangel_sft+ppo_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 259, "lengths": [55, 136, 54, 5, 30, 78, 91, 40, 266, 86, 79, 169, 86, 89, 143, 11, 107, 36, 75, 43, 38, 28, 10, 5, 30, 16, 6, 414, 88, 46, 23, 220, 273, 195, 26, 86, 32, 206, 27, 167, 134, 83, 125, 164, 515, 17, 162, 123, 397, 20, 59, 112, 107, 184, 163, 18, 373, 333, 6, 267, 69, 11, 22, 17, 206, 11, 52, 40, 100, 61, 20, 9, 67, 73, 57, 66, 67, 53, 86, 8, 46, 75, 84, 9, 6, 87, 69, 301, 106, 38, 97, 76, 6, 6, 25, 106, 84, 116, 6, 27, 12, 20, 97, 160, 19, 135, 88, 583, 91, 80, 11, 16, 149, 648, 44, 25, 41, 120, 32, 30, 40, 35, 225, 65, 185, 87, 196, 12, 6, 65, 56, 47, 210, 69, 135, 104, 24, 59, 83, 106, 151, 51, 92, 116, 775, 264, 38, 405, 98, 211, 60, 80, 57, 25, 16, 6, 22, 29, 117, 148, 53, 521, 73, 161, 170, 56, 126, 143, 3, 122, 127, 55, 99, 112, 121, 18, 98, 26, 236, 292, 254, 46, 13, 77, 207, 41, 14, 37, 56, 94, 32, 6, 22, 75, 238, 81, 31, 12, 236, 108, 141, 18, 159, 196, 99, 35, 46, 171, 39, 108, 32, 26, 80, 29, 56, 17, 110, 53, 485, 157, 15, 134, 50, 199, 9, 266, 64, 9, 105, 81, 20, 175, 117, 20, 64, 78, 116, 31, 194, 51, 43, 7, 11, 98, 34, 105, 93, 50, 22, 272, 118, 127, 76, 78, 6, 248, 6, 52, 61, 175, 27, 26, 158, 74, 46, 16, 53, 114, 16, 92, 46, 71, 126, 56, 240, 24, 74, 71, 19, 80, 65, 69, 210, 15, 77, 240, 59, 21, 135, 200, 73, 5, 45, 119, 42, 68, 166, 78, 30, 169, 99, 106, 57, 318, 492, 30, 49, 119, 283, 140, 17, 7, 139, 31, 18, 40, 35, 261, 152, 159, 27, 47, 16, 112, 20, 13, 41, 155, 22, 88, 37, 151, 207, 77, 141, 312, 297, 179, 82, 78, 62, 3, 50, 29, 16, 292, 13, 18, 173, 39, 60, 42, 183, 220, 144, 22, 59, 99, 29, 87, 16, 76, 129, 342, 23, 6, 245, 59, 104, 227, 20, 74, 49, 47, 261, 53, 100, 67, 36, 151, 91, 132, 23, 6, 59, 45, 27, 252, 23, 33, 72, 165, 105, 263, 345, 81, 76, 153, 8, 142, 85, 19, 31, 257, 95, 95, 230, 93, 251, 58, 32, 43, 17, 54, 14, 4, 64, 29, 16, 23, 6, 6, 65, 117, 93, 61, 32, 12, 35, 125, 229, 54, 31, 36, 20, 6, 114, 63, 206, 112, 11, 554, 16, 38, 80, 12, 11, 66, 195, 110, 188, 46, 159, 18, 51, 120, 74, 9, 184, 82, 29, 25, 127, 152, 141, 49, 140, 148, 29, 4, 51, 6, 77, 29, 8, 66, 24, 26, 70, 221, 54, 43, 34, 42, 37, 20, 129, 46, 22, 35, 22, 133, 15, 55, 237, 42, 99, 134, 200, 130, 44, 14, 16, 4, 93, 92, 58, 51, 32, 29, 6, 37]}, "baseline": {"name": "chosen", "wins": 218, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+ppo_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 56395, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+ppo_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+ppo_llama30b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama30b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "ppo", "ppo_epochs": 1, "cliprange": 0.5, "trainer": "PPOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "lam": 0.95, "gamma": 0.99, "critic_coef": 0.01, "KL_coef": 0.1, "use_reference_model": true}}}
{"date": "2024-01-08 16:30:40.096733", "total": 512, "seed": 0, "exp_name": "archangel_sft+slic_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 39, "lengths": [326, 303, 528, 595, 265, 63, 244, 49, 1153, 628, 802, 956, 270, 1153, 595, 519, 234, 185, 192, 1153, 151, 381, 208, 311, 1153, 492, 55, 331, 814, 223, 955, 333, 502, 830, 222, 830, 830, 768, 830, 121, 680, 587, 359, 830, 573, 221, 229, 830, 358, 489, 171, 388, 830, 553, 150, 419, 259, 148, 68, 830, 186, 830, 830, 830, 293, 507, 407, 1522, 1522, 1521, 627, 249, 97, 975, 270, 422, 1407, 189, 512, 1522, 740, 740, 109, 179, 545, 724, 344, 271, 1522, 502, 1522, 865, 130, 58, 835, 432, 213, 363, 199, 270, 266, 124, 293, 1232, 1232, 1232, 1232, 133, 1230, 177, 173, 50, 141, 876, 600, 67, 198, 375, 167, 318, 267, 595, 578, 219, 362, 817, 479, 181, 1024, 878, 237, 1023, 1024, 1024, 386, 129, 606, 206, 900, 338, 77, 115, 322, 302, 1024, 790, 367, 224, 313, 1024, 143, 738, 1024, 119, 367, 44, 249, 339, 1024, 234, 830, 707, 830, 830, 827, 377, 244, 301, 115, 668, 150, 830, 249, 337, 830, 759, 582, 356, 234, 110, 408, 821, 829, 712, 829, 286, 88, 600, 830, 607, 830, 806, 1622, 86, 705, 77, 1622, 1622, 845, 380, 113, 325, 82, 1053, 503, 66, 421, 942, 116, 665, 819, 188, 1622, 246, 1622, 994, 249, 180, 398, 951, 360, 1622, 1622, 1080, 322, 129, 1125, 835, 260, 186, 1125, 356, 1120, 176, 90, 1124, 187, 260, 194, 691, 1125, 134, 375, 1125, 1125, 143, 211, 1125, 86, 1125, 186, 91, 216, 225, 325, 766, 17, 463, 136, 463, 163, 462, 463, 247, 463, 231, 463, 463, 445, 444, 463, 176, 177, 359, 454, 463, 323, 463, 249, 463, 202, 118, 463, 463, 391, 156, 463, 196, 263, 327, 603, 97, 262, 552, 25, 125, 319, 96, 1198, 1074, 1301, 559, 396, 546, 1340, 212, 1340, 1340, 386, 1340, 39, 394, 211, 917, 361, 356, 581, 471, 1340, 239, 1434, 459, 171, 259, 1433, 391, 292, 513, 95, 322, 894, 514, 355, 1434, 273, 414, 436, 889, 423, 1258, 159, 227, 112, 121, 62, 243, 323, 168, 266, 325, 1434, 99, 615, 607, 853, 777, 462, 205, 61, 170, 855, 398, 854, 106, 855, 9, 275, 317, 243, 855, 70, 855, 855, 379, 855, 97, 795, 149, 172, 136, 855, 855, 437, 98, 1031, 742, 1031, 962, 470, 428, 514, 276, 391, 413, 1031, 574, 355, 221, 916, 1031, 56, 463, 251, 945, 304, 401, 1031, 135, 1031, 147, 161, 1031, 274, 436, 1031, 149, 265, 1289, 645, 1289, 23, 120, 388, 726, 931, 629, 245, 3, 246, 215, 545, 578, 719, 1289, 80, 65, 1289, 512, 537, 368, 574, 1289, 870, 1289, 591, 203, 338, 347, 1638, 258, 1243, 1452, 320, 648, 278, 582, 666, 190, 573, 1638, 106, 245, 585, 200, 627, 66, 401, 458, 324, 59, 514, 33, 430, 100, 82, 279, 1096, 146, 905, 379, 347, 34, 191, 384, 65, 199, 1102, 328, 156, 190, 306, 636, 177, 639, 271, 186, 1102, 1102, 395, 789, 254, 150, 150, 56, 228, 248, 552, 779, 140, 240, 1102, 272]}, "baseline": {"name": "chosen", "wins": 455, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+slic_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 40767, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+slic_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+slic_pythia1-4b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia1-4b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 16:47:08.008856", "total": 512, "seed": 0, "exp_name": "archangel_sft+slic_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 70, "lengths": [352, 773, 614, 1152, 286, 1113, 128, 33, 1153, 1153, 166, 332, 344, 162, 691, 1153, 230, 77, 185, 1153, 1153, 146, 220, 1147, 1153, 1152, 5, 856, 223, 544, 1153, 510, 830, 830, 830, 298, 295, 830, 609, 546, 269, 829, 830, 338, 830, 192, 683, 89, 830, 830, 189, 454, 172, 384, 829, 114, 830, 155, 82, 558, 174, 830, 192, 680, 401, 1118, 287, 387, 177, 484, 126, 821, 1132, 1522, 1135, 454, 365, 558, 199, 20, 102, 82, 536, 952, 1522, 1352, 208, 997, 307, 579, 518, 365, 14, 68, 747, 320, 134, 192, 9, 92, 153, 920, 1232, 1232, 686, 388, 1218, 134, 1040, 362, 625, 67, 408, 406, 471, 739, 460, 527, 145, 1232, 246, 537, 1232, 411, 1232, 1232, 117, 1232, 32, 1024, 193, 297, 1023, 1024, 364, 1024, 125, 1001, 1024, 603, 1024, 199, 122, 764, 1024, 1024, 650, 479, 84, 242, 1023, 336, 544, 104, 135, 27, 116, 278, 1024, 1024, 824, 627, 830, 147, 829, 603, 200, 596, 115, 830, 830, 87, 830, 342, 676, 830, 830, 23, 830, 337, 830, 112, 453, 830, 823, 42, 79, 419, 796, 663, 830, 17, 106, 198, 1622, 14, 255, 267, 1622, 118, 393, 156, 701, 322, 643, 224, 1622, 615, 155, 577, 761, 151, 1622, 795, 979, 605, 1622, 194, 393, 342, 762, 1238, 159, 257, 1124, 529, 1125, 92, 1125, 378, 803, 1125, 125, 258, 1125, 483, 135, 361, 197, 691, 1125, 268, 293, 575, 89, 192, 289, 646, 1125, 1125, 577, 150, 67, 390, 13, 235, 4, 463, 463, 175, 463, 286, 176, 207, 463, 463, 167, 386, 237, 463, 153, 190, 252, 463, 463, 268, 463, 442, 463, 398, 234, 365, 463, 400, 463, 228, 328, 170, 273, 1339, 301, 667, 960, 1340, 476, 571, 182, 91, 584, 1342, 1340, 1074, 217, 149, 1340, 108, 84, 347, 510, 720, 439, 101, 696, 410, 452, 50, 1340, 1340, 208, 206, 121, 1434, 270, 484, 181, 314, 1278, 413, 255, 170, 119, 1434, 1434, 645, 883, 909, 532, 93, 462, 1297, 154, 1434, 365, 1434, 283, 673, 235, 501, 1434, 1433, 402, 165, 169, 115, 207, 290, 855, 81, 855, 855, 855, 297, 792, 90, 738, 23, 411, 34, 855, 855, 25, 182, 621, 211, 855, 855, 855, 587, 160, 136, 772, 120, 293, 367, 34, 29, 1031, 907, 166, 118, 429, 953, 1030, 233, 480, 771, 148, 1030, 210, 1031, 295, 216, 914, 91, 586, 122, 1031, 284, 469, 455, 217, 435, 562, 79, 252, 39, 685, 663, 271, 88, 10, 1289, 401, 72, 1289, 1289, 1289, 3, 255, 146, 345, 758, 208, 194, 603, 40, 47, 217, 77, 530, 353, 1289, 170, 49, 108, 251, 1131, 124, 112, 1638, 1638, 400, 1638, 282, 81, 1638, 846, 85, 356, 630, 879, 100, 917, 1638, 282, 1638, 1638, 758, 99, 724, 1639, 4, 45, 76, 753, 279, 1638, 1239, 127, 1638, 212, 95, 226, 1102, 354, 530, 568, 350, 296, 1011, 835, 544, 1102, 158, 503, 77, 711, 1102, 465, 522, 1000, 287, 238, 64, 1102, 1102, 179, 619, 241, 147, 12, 461]}, "baseline": {"name": "chosen", "wins": 420, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+slic_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 35963, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+slic_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+slic_pythia2-8b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia2-8b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 17:03:14.066442", "total": 512, "seed": 0, "exp_name": "archangel_sft+slic_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 93, "lengths": [354, 231, 499, 221, 64, 408, 132, 167, 1153, 1153, 476, 417, 108, 1153, 49, 817, 448, 27, 517, 836, 1153, 870, 808, 343, 581, 1153, 4, 173, 152, 336, 203, 535, 830, 830, 830, 303, 86, 109, 70, 330, 143, 830, 156, 830, 830, 600, 94, 114, 830, 157, 668, 828, 114, 367, 288, 275, 830, 795, 6, 469, 392, 830, 353, 830, 307, 48, 410, 373, 1228, 383, 134, 871, 1522, 641, 212, 223, 564, 183, 1522, 1004, 296, 338, 102, 171, 92, 317, 213, 52, 1522, 539, 178, 551, 207, 56, 537, 304, 530, 156, 1117, 155, 203, 166, 422, 126, 84, 856, 444, 96, 930, 478, 21, 12, 632, 184, 43, 488, 269, 122, 85, 545, 242, 392, 1232, 101, 472, 431, 241, 590, 933, 952, 111, 591, 1024, 777, 258, 352, 129, 719, 216, 49, 309, 1024, 497, 816, 1024, 1024, 235, 862, 446, 9, 424, 451, 1024, 611, 882, 321, 896, 464, 181, 1024, 87, 830, 830, 56, 830, 354, 61, 99, 14, 230, 96, 208, 107, 269, 829, 253, 81, 268, 830, 250, 830, 830, 47, 741, 830, 76, 4, 126, 284, 72, 830, 19, 89, 178, 87, 1622, 1622, 224, 648, 127, 101, 1164, 314, 417, 901, 87, 120, 261, 46, 155, 378, 176, 66, 159, 1622, 82, 310, 155, 1337, 108, 423, 1622, 386, 420, 34, 367, 282, 356, 1125, 109, 124, 1125, 228, 64, 85, 175, 147, 366, 1126, 67, 284, 81, 889, 161, 35, 1125, 1125, 108, 81, 1068, 102, 64, 462, 226, 1125, 1125, 4, 270, 134, 335, 362, 160, 463, 463, 463, 290, 59, 394, 175, 463, 69, 384, 463, 124, 287, 248, 133, 449, 463, 297, 173, 190, 20, 115, 200, 463, 351, 101, 694, 231, 132, 352, 46, 65, 952, 498, 342, 104, 1339, 147, 51, 411, 377, 175, 1340, 1340, 179, 204, 1340, 184, 15, 16, 264, 83, 119, 55, 1340, 1340, 1340, 106, 80, 251, 1434, 990, 168, 52, 110, 329, 241, 156, 307, 471, 256, 352, 159, 671, 1038, 1434, 226, 107, 143, 1237, 85, 128, 205, 87, 1434, 533, 370, 1434, 871, 86, 150, 90, 503, 76, 743, 74, 855, 193, 296, 380, 782, 855, 855, 428, 421, 108, 740, 855, 99, 223, 119, 274, 125, 71, 132, 132, 149, 854, 49, 232, 81, 14, 182, 49, 203, 475, 567, 129, 396, 359, 1031, 359, 90, 1031, 298, 1031, 50, 225, 663, 377, 199, 1030, 451, 951, 936, 204, 1031, 269, 96, 1031, 125, 757, 1031, 1031, 351, 136, 271, 1289, 11, 12, 465, 242, 217, 1289, 94, 103, 301, 271, 300, 300, 661, 533, 620, 162, 837, 1289, 1174, 395, 51, 1289, 284, 37, 1289, 665, 1289, 321, 1638, 281, 262, 302, 1638, 1636, 111, 633, 549, 1638, 76, 487, 1638, 123, 95, 246, 795, 1624, 185, 314, 686, 185, 139, 1634, 20, 620, 64, 1638, 240, 71, 511, 214, 173, 56, 73, 1102, 1076, 229, 488, 1102, 169, 31, 174, 799, 458, 109, 136, 178, 192, 85, 284, 81, 74, 34, 165, 304, 214, 374, 280, 36, 626, 250, 1102, 228]}, "baseline": {"name": "chosen", "wins": 396, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+slic_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 43925, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+slic_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+slic_pythia6-9b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia6-9b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 17:19:49.588660", "total": 512, "seed": 0, "exp_name": "archangel_sft+slic_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 94, "lengths": [33, 212, 99, 685, 295, 180, 140, 57, 163, 70, 373, 1153, 246, 21, 372, 619, 421, 442, 48, 249, 198, 214, 256, 699, 81, 85, 6, 622, 269, 318, 258, 503, 321, 576, 43, 100, 765, 155, 830, 354, 64, 333, 588, 117, 105, 349, 830, 119, 830, 830, 830, 306, 81, 830, 830, 619, 830, 20, 13, 830, 580, 829, 830, 674, 1344, 825, 970, 308, 96, 385, 89, 1522, 76, 1522, 555, 239, 1522, 15, 888, 1522, 15, 293, 221, 1069, 1374, 354, 337, 273, 75, 190, 336, 31, 521, 8, 150, 95, 258, 96, 60, 324, 1232, 65, 1232, 242, 13, 205, 717, 105, 370, 1232, 4, 132, 79, 35, 36, 104, 200, 42, 33, 90, 1232, 496, 1232, 144, 355, 114, 382, 1232, 1023, 1024, 68, 1024, 48, 60, 299, 1024, 626, 67, 217, 1024, 201, 175, 182, 153, 1024, 178, 199, 1024, 185, 15, 112, 1024, 347, 1024, 328, 6, 316, 111, 134, 242, 69, 104, 114, 130, 371, 830, 55, 185, 358, 516, 235, 196, 830, 468, 180, 53, 161, 145, 581, 830, 307, 50, 59, 830, 830, 727, 830, 86, 75, 241, 830, 830, 166, 32, 1137, 53, 139, 190, 1622, 158, 185, 1622, 46, 682, 493, 44, 1622, 361, 156, 732, 315, 92, 61, 291, 14, 743, 76, 191, 217, 576, 120, 264, 53, 277, 142, 267, 87, 42, 200, 107, 48, 1125, 356, 144, 107, 91, 144, 1125, 58, 96, 1124, 77, 154, 231, 473, 173, 121, 1125, 99, 1125, 1125, 56, 90, 283, 59, 241, 4, 57, 183, 320, 189, 65, 461, 139, 463, 463, 72, 463, 37, 124, 463, 56, 463, 69, 119, 463, 301, 463, 463, 364, 51, 463, 213, 453, 463, 463, 123, 462, 710, 107, 148, 16, 1038, 86, 206, 26, 1340, 99, 260, 426, 1340, 430, 111, 41, 361, 87, 398, 86, 633, 478, 1340, 1340, 197, 1340, 1340, 1340, 249, 229, 161, 162, 40, 1434, 16, 624, 1434, 595, 49, 102, 83, 175, 970, 1434, 183, 307, 567, 475, 1287, 185, 1434, 183, 356, 192, 299, 556, 4, 416, 3, 71, 875, 432, 555, 141, 43, 69, 283, 175, 709, 855, 32, 855, 142, 194, 88, 855, 191, 137, 787, 28, 855, 254, 146, 855, 62, 72, 855, 224, 130, 290, 27, 855, 22, 192, 219, 18, 831, 19, 141, 196, 22, 174, 1031, 290, 1031, 41, 559, 69, 424, 58, 1031, 1031, 468, 52, 150, 82, 536, 1031, 0, 133, 426, 452, 162, 624, 1031, 1031, 482, 128, 132, 85, 124, 241, 6, 18, 199, 1289, 872, 34, 758, 1007, 306, 460, 1054, 214, 1021, 1289, 64, 724, 25, 482, 62, 252, 194, 107, 508, 138, 187, 184, 15, 405, 221, 616, 1008, 236, 357, 1638, 156, 91, 1413, 380, 381, 64, 646, 328, 60, 623, 1533, 148, 186, 227, 139, 177, 312, 444, 36, 1638, 95, 186, 526, 129, 56, 311, 106, 170, 1101, 629, 191, 338, 146, 293, 451, 49, 280, 843, 369, 699, 59, 1102, 350, 125, 795, 903, 1102, 209, 1102, 146, 36, 91, 1011, 1102, 560, 23, 7, 29]}, "baseline": {"name": "chosen", "wins": 395, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+slic_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 34369, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+slic_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+slic_pythia12-0b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia12-0b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 17:35:17.185270", "total": 512, "seed": 0, "exp_name": "archangel_sft+slic_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 195, "lengths": [275, 120, 105, 101, 257, 156, 126, 89, 504, 294, 281, 203, 91, 267, 82, 123, 134, 189, 207, 81, 126, 70, 227, 63, 193, 190, 113, 384, 109, 147, 91, 132, 205, 164, 71, 180, 68, 198, 92, 161, 283, 185, 89, 88, 301, 164, 232, 97, 283, 82, 65, 369, 71, 210, 111, 130, 742, 140, 86, 229, 167, 358, 180, 95, 228, 189, 66, 453, 200, 222, 248, 139, 234, 195, 112, 88, 175, 81, 165, 79, 98, 68, 80, 251, 67, 170, 119, 442, 137, 273, 170, 347, 92, 121, 64, 337, 217, 125, 160, 70, 93, 169, 215, 416, 103, 500, 290, 228, 110, 87, 33, 82, 177, 228, 112, 321, 122, 145, 71, 163, 103, 258, 544, 271, 223, 101, 647, 50, 670, 837, 158, 206, 1025, 158, 230, 100, 28, 294, 461, 170, 204, 84, 129, 210, 618, 161, 132, 248, 482, 149, 115, 129, 384, 124, 149, 333, 161, 184, 225, 135, 653, 203, 206, 179, 479, 813, 243, 625, 468, 179, 231, 118, 185, 303, 256, 60, 192, 47, 423, 155, 149, 64, 69, 812, 138, 70, 187, 300, 814, 151, 92, 60, 44, 232, 360, 100, 131, 195, 305, 261, 140, 111, 297, 163, 370, 240, 208, 128, 83, 71, 66, 85, 117, 202, 195, 124, 139, 94, 233, 101, 64, 283, 111, 526, 89, 290, 223, 143, 66, 115, 148, 213, 563, 191, 134, 263, 131, 362, 283, 102, 66, 181, 147, 111, 118, 102, 218, 81, 100, 508, 192, 119, 123, 179, 144, 149, 11, 74, 77, 207, 102, 122, 240, 109, 197, 70, 240, 195, 45, 158, 240, 240, 65, 76, 240, 102, 133, 240, 27, 112, 156, 134, 188, 91, 77, 103, 240, 142, 128, 171, 134, 75, 149, 100, 157, 335, 291, 245, 678, 125, 123, 245, 780, 82, 622, 35, 74, 160, 382, 680, 62, 58, 288, 293, 257, 46, 123, 229, 95, 128, 87, 114, 170, 259, 55, 151, 237, 1374, 85, 91, 115, 489, 284, 109, 115, 204, 1375, 256, 101, 245, 195, 81, 160, 117, 100, 440, 88, 284, 204, 109, 158, 53, 682, 219, 323, 69, 162, 72, 234, 81, 156, 169, 76, 124, 683, 119, 229, 74, 162, 683, 278, 134, 107, 108, 269, 82, 407, 122, 225, 115, 652, 87, 258, 59, 84, 176, 100, 190, 317, 110, 609, 164, 163, 403, 441, 234, 242, 525, 235, 166, 208, 76, 438, 599, 236, 915, 297, 110, 567, 270, 121, 129, 31, 110, 165, 914, 307, 52, 107, 82, 58, 44, 244, 398, 303, 242, 873, 97, 76, 89, 168, 100, 127, 90, 69, 54, 201, 167, 668, 106, 168, 619, 520, 294, 105, 114, 83, 90, 616, 256, 159, 221, 754, 177, 185, 174, 181, 92, 127, 115, 276, 172, 149, 144, 99, 118, 236, 87, 151, 136, 162, 115, 121, 256, 172, 111, 48, 163, 145, 107, 102, 69, 72, 328, 180, 252, 102, 963, 160, 161, 318, 310, 107, 167, 328, 183, 110, 162, 229, 449, 76, 41, 122, 90, 368, 230, 210, 98, 152, 74, 126, 143]}, "baseline": {"name": "chosen", "wins": 291, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+slic_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 41313, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+slic_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+slic_llama7b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama7b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 17:50:29.533266", "total": 512, "seed": 0, "exp_name": "archangel_sft+slic_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 205, "lengths": [57, 483, 335, 84, 286, 157, 309, 99, 485, 839, 532, 1085, 75, 98, 148, 467, 394, 100, 103, 63, 270, 63, 74, 111, 510, 295, 99, 565, 75, 207, 534, 254, 742, 356, 200, 129, 115, 742, 742, 257, 185, 742, 224, 338, 671, 107, 302, 278, 320, 259, 102, 300, 622, 330, 139, 79, 622, 425, 97, 189, 193, 67, 120, 53, 130, 130, 145, 96, 151, 200, 73, 104, 208, 262, 72, 34, 202, 92, 95, 32, 63, 231, 96, 240, 88, 204, 129, 227, 121, 116, 173, 175, 75, 28, 729, 276, 367, 63, 48, 93, 63, 779, 247, 96, 143, 506, 184, 156, 105, 87, 17, 86, 133, 337, 76, 137, 119, 225, 78, 326, 55, 336, 400, 84, 190, 129, 573, 85, 343, 160, 191, 940, 371, 1025, 591, 177, 58, 98, 999, 178, 136, 78, 240, 373, 1026, 238, 226, 94, 484, 188, 190, 101, 371, 408, 108, 12, 184, 136, 182, 117, 144, 814, 814, 90, 189, 97, 639, 185, 218, 473, 122, 61, 772, 562, 258, 51, 174, 41, 516, 422, 207, 107, 37, 814, 217, 74, 54, 102, 155, 86, 173, 99, 349, 402, 288, 121, 129, 151, 235, 114, 187, 100, 247, 299, 340, 36, 150, 287, 87, 333, 134, 99, 319, 145, 471, 216, 114, 69, 337, 612, 47, 729, 348, 487, 116, 217, 191, 107, 94, 109, 614, 278, 193, 159, 132, 146, 248, 385, 415, 106, 145, 89, 136, 76, 77, 131, 463, 85, 112, 352, 239, 360, 128, 397, 108, 119, 14, 240, 197, 240, 75, 74, 203, 157, 102, 97, 112, 122, 58, 101, 115, 48, 126, 99, 240, 240, 134, 240, 240, 159, 240, 168, 240, 32, 100, 240, 239, 188, 121, 187, 340, 58, 1283, 276, 133, 122, 298, 84, 558, 382, 212, 161, 159, 227, 274, 34, 83, 127, 188, 104, 354, 83, 258, 581, 54, 38, 165, 171, 176, 146, 156, 107, 132, 185, 43, 49, 153, 317, 549, 139, 205, 487, 134, 309, 242, 376, 551, 129, 153, 411, 92, 193, 77, 154, 289, 178, 115, 63, 335, 82, 89, 54, 343, 196, 174, 293, 141, 237, 85, 178, 102, 106, 113, 183, 53, 223, 220, 173, 89, 682, 212, 131, 118, 66, 229, 148, 544, 119, 299, 253, 251, 141, 180, 75, 94, 257, 45, 506, 229, 99, 915, 202, 238, 178, 334, 273, 228, 241, 60, 481, 284, 67, 85, 424, 915, 381, 877, 134, 382, 238, 177, 184, 37, 263, 263, 51, 258, 141, 97, 133, 101, 89, 310, 590, 269, 164, 135, 157, 72, 141, 477, 179, 151, 153, 92, 163, 93, 333, 905, 73, 69, 434, 174, 91, 290, 169, 25, 236, 550, 319, 212, 76, 270, 53, 128, 154, 121, 99, 474, 129, 332, 112, 138, 502, 174, 460, 188, 148, 176, 79, 335, 70, 54, 168, 62, 84, 226, 369, 112, 287, 421, 87, 61, 231, 64, 123, 52, 120, 94, 693, 124, 515, 208, 168, 962, 132, 193, 330, 159, 554, 191, 153, 99, 179, 304, 195, 218, 80, 140, 91, 4, 598]}, "baseline": {"name": "chosen", "wins": 284, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+slic_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 36969, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+slic_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+slic_llama13b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama13b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 18:06:06.123837", "total": 512, "seed": 0, "exp_name": "archangel_sft+slic_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 192, "lengths": [62, 139, 112, 115, 134, 167, 141, 161, 1280, 1460, 375, 320, 120, 178, 528, 1459, 1086, 79, 1086, 587, 216, 189, 77, 1086, 199, 167, 86, 89, 147, 117, 208, 208, 463, 270, 474, 202, 106, 151, 610, 245, 1404, 156, 167, 117, 292, 172, 908, 294, 278, 305, 100, 132, 136, 742, 161, 198, 234, 262, 229, 742, 207, 85, 121, 98, 177, 1511, 450, 183, 953, 197, 175, 1511, 162, 283, 154, 68, 194, 206, 242, 126, 208, 1464, 132, 309, 117, 194, 127, 655, 259, 100, 1463, 802, 166, 69, 435, 119, 142, 77, 290, 75, 47, 94, 215, 300, 996, 289, 154, 997, 177, 115, 678, 205, 240, 531, 144, 101, 516, 205, 100, 148, 192, 119, 348, 518, 190, 294, 346, 119, 168, 371, 74, 268, 283, 138, 164, 360, 47, 237, 337, 253, 194, 161, 173, 954, 1026, 327, 100, 339, 180, 263, 185, 166, 607, 290, 105, 1025, 104, 243, 267, 428, 161, 814, 395, 119, 814, 403, 223, 195, 458, 157, 163, 201, 340, 169, 109, 515, 174, 81, 533, 303, 160, 94, 72, 211, 186, 897, 111, 381, 99, 91, 214, 153, 326, 165, 216, 174, 1590, 215, 943, 149, 201, 496, 132, 308, 404, 164, 836, 223, 110, 121, 180, 816, 79, 290, 190, 180, 256, 102, 270, 1128, 152, 185, 102, 237, 142, 144, 220, 90, 319, 149, 471, 547, 1025, 122, 141, 174, 120, 1026, 1024, 173, 105, 148, 484, 195, 141, 327, 167, 482, 1289, 193, 464, 1288, 256, 208, 43, 241, 90, 84, 115, 204, 1357, 114, 184, 237, 1358, 97, 173, 111, 60, 128, 100, 143, 126, 111, 240, 239, 240, 196, 214, 152, 240, 240, 240, 228, 209, 234, 35, 240, 530, 123, 147, 55, 144, 119, 66, 207, 334, 241, 157, 164, 218, 189, 100, 874, 141, 1282, 459, 129, 151, 692, 83, 1283, 211, 413, 112, 322, 177, 330, 182, 1283, 270, 346, 98, 167, 54, 242, 1375, 261, 94, 160, 481, 1375, 240, 140, 356, 626, 222, 307, 137, 419, 1602, 170, 156, 261, 432, 137, 95, 60, 534, 97, 266, 55, 175, 149, 264, 107, 308, 93, 121, 115, 95, 100, 262, 145, 154, 233, 241, 683, 270, 483, 443, 148, 531, 112, 328, 1443, 523, 144, 804, 291, 214, 165, 184, 293, 131, 309, 89, 152, 189, 130, 849, 107, 201, 262, 306, 441, 569, 317, 651, 217, 219, 84, 117, 477, 107, 915, 915, 165, 210, 101, 176, 725, 97, 204, 138, 90, 355, 113, 309, 241, 130, 1025, 103, 338, 1219, 179, 139, 79, 104, 177, 225, 1220, 491, 1363, 101, 59, 220, 148, 682, 1363, 1363, 922, 340, 92, 122, 105, 112, 124, 270, 1605, 348, 146, 196, 58, 126, 151, 110, 433, 128, 151, 145, 103, 125, 1606, 174, 185, 519, 166, 94, 122, 103, 76, 101, 100, 301, 124, 554, 219, 174, 118, 145, 128, 69, 306, 169, 182, 124, 1528, 139, 234, 107, 293, 167, 142, 374, 1528, 176, 351, 173, 430, 962, 87, 123, 83, 148, 328, 180, 112, 179, 251, 59, 962]}, "baseline": {"name": "chosen", "wins": 301, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+slic_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 50953, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+slic_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+slic_llama30b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama30b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 16, "gradient_accumulation_steps": 2, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "slic", "beta": 1.0, "lambda_coef": 0.1, "trainer": "SLiCTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": false}}}
{"date": "2024-01-08 18:21:08.540144", "total": 512, "seed": 0, "exp_name": "archangel_sft+csft_pythia1-4b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 74, "lengths": [44, 51, 14, 96, 36, 13, 55, 58, 125, 10, 88, 65, 259, 15, 29, 18, 180, 48, 37, 50, 59, 253, 13, 28, 124, 54, 138, 312, 45, 51, 67, 151, 55, 489, 63, 42, 129, 47, 47, 41, 32, 130, 12, 26, 289, 43, 788, 70, 95, 8, 120, 108, 17, 160, 36, 45, 384, 237, 817, 83, 87, 47, 184, 129, 78, 44, 68, 105, 157, 30, 247, 63, 39, 15, 29, 7, 201, 21, 426, 46, 14, 13, 25, 154, 512, 182, 46, 534, 510, 24, 10, 78, 17, 31, 109, 106, 73, 19, 14, 49, 11, 11, 47, 180, 16, 393, 158, 40, 43, 30, 5, 42, 41, 54, 100, 87, 85, 86, 32, 81, 27, 48, 30, 21, 49, 85, 177, 86, 29, 28, 30, 28, 212, 126, 26, 60, 19, 21, 75, 59, 639, 18, 94, 90, 1024, 230, 34, 128, 67, 1024, 57, 139, 395, 15, 42, 255, 32, 83, 10, 31, 30, 16, 70, 10, 7, 47, 12, 38, 72, 125, 104, 85, 9, 126, 53, 14, 49, 550, 32, 81, 171, 56, 26, 205, 154, 26, 15, 380, 35, 62, 72, 12, 39, 81, 78, 74, 11, 270, 71, 154, 101, 1230, 76, 121, 9, 30, 33, 37, 81, 51, 88, 36, 148, 107, 180, 36, 10, 41, 229, 149, 19, 57, 38, 177, 7, 7, 90, 18, 17, 58, 96, 202, 75, 15, 30, 226, 35, 289, 81, 86, 10, 102, 108, 9, 53, 133, 77, 133, 15, 63, 22, 31, 105, 143, 162, 43, 4, 12, 34, 35, 21, 12, 192, 17, 5, 84, 53, 24, 13, 29, 463, 179, 165, 103, 100, 118, 277, 85, 10, 43, 101, 48, 108, 36, 26, 409, 230, 24, 1017, 82, 64, 33, 56, 27, 59, 160, 23, 88, 71, 81, 233, 154, 8, 109, 57, 26, 207, 126, 80, 549, 28, 52, 34, 133, 10, 15, 63, 52, 187, 46, 27, 407, 6, 70, 25, 17, 95, 8, 11, 23, 132, 168, 97, 146, 55, 139, 239, 17, 114, 18, 146, 55, 81, 119, 354, 23, 37, 84, 43, 18, 258, 42, 62, 14, 365, 43, 764, 855, 23, 112, 103, 86, 855, 822, 14, 6, 68, 6, 160, 59, 64, 38, 94, 35, 121, 48, 163, 12, 45, 154, 91, 58, 39, 336, 106, 10, 24, 60, 99, 186, 62, 9, 97, 68, 64, 35, 141, 11, 7, 89, 86, 67, 39, 120, 108, 117, 200, 37, 1031, 39, 247, 10, 47, 40, 23, 11, 252, 8, 45, 20, 11, 6, 62, 61, 46, 32, 100, 12, 37, 68, 4, 30, 21, 9, 86, 18, 56, 743, 62, 81, 77, 265, 47, 36, 68, 8, 5, 22, 127, 1637, 163, 57, 781, 15, 35, 45, 248, 77, 68, 346, 34, 46, 32, 92, 135, 84, 84, 110, 47, 42, 101, 4, 56, 70, 22, 93, 16, 45, 128, 118, 23, 67, 12, 109, 44, 19, 85, 410, 14, 137, 35, 50, 14, 91, 1102, 74, 40, 244, 45, 105, 1102, 17, 15, 29, 95, 38, 43, 156, 51, 9, 112, 193]}, "baseline": {"name": "chosen", "wins": 404, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+csft_pythia1-4b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 43803, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+csft_pythia1-4b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+csft_pythia1-4b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-1.4b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia1-4b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 18:36:36.859720", "total": 512, "seed": 0, "exp_name": "archangel_sft+csft_pythia2-8b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 103, "lengths": [7, 92, 105, 36, 69, 247, 20, 15, 655, 18, 114, 107, 50, 15, 21, 23, 294, 48, 63, 125, 30, 22, 18, 210, 88, 145, 49, 24, 99, 37, 31, 117, 96, 174, 8, 79, 45, 24, 69, 164, 46, 139, 16, 40, 64, 83, 7, 73, 79, 19, 250, 830, 89, 44, 65, 21, 353, 61, 9, 101, 52, 43, 134, 148, 307, 44, 6, 212, 27, 34, 29, 9, 109, 328, 8, 57, 280, 28, 247, 14, 20, 60, 57, 39, 54, 60, 96, 8, 98, 39, 51, 55, 12, 9, 44, 242, 40, 22, 12, 15, 28, 37, 247, 37, 13, 19, 26, 183, 48, 50, 33, 17, 43, 45, 33, 34, 33, 21, 38, 34, 57, 70, 529, 46, 61, 74, 111, 50, 10, 17, 27, 43, 1024, 66, 16, 447, 52, 31, 130, 66, 138, 83, 72, 85, 1024, 46, 58, 44, 14, 14, 60, 23, 127, 59, 81, 13, 52, 200, 78, 27, 95, 9, 277, 205, 51, 9, 222, 77, 8, 62, 80, 154, 16, 16, 99, 48, 143, 88, 20, 71, 83, 22, 15, 47, 90, 7, 36, 222, 133, 88, 59, 14, 139, 47, 112, 59, 68, 11, 38, 143, 146, 22, 46, 103, 146, 83, 35, 305, 67, 137, 24, 42, 220, 31, 216, 126, 169, 55, 82, 19, 18, 155, 67, 118, 23, 30, 8, 14, 140, 86, 277, 241, 79, 14, 18, 170, 33, 791, 35, 177, 111, 13, 19, 9, 9, 129, 36, 58, 151, 25, 74, 203, 504, 167, 11, 103, 21, 8, 23, 149, 6, 93, 160, 129, 120, 8, 195, 277, 21, 55, 463, 92, 289, 19, 96, 28, 91, 45, 26, 159, 41, 463, 159, 10, 299, 108, 61, 59, 141, 10, 71, 8, 89, 57, 20, 198, 21, 73, 81, 19, 101, 19, 70, 14, 275, 21, 70, 47, 33, 147, 28, 6, 369, 193, 25, 19, 77, 402, 105, 62, 59, 102, 15, 600, 38, 10, 89, 148, 20, 9, 124, 224, 95, 531, 161, 630, 69, 1434, 27, 41, 19, 14, 62, 26, 615, 63, 43, 76, 40, 26, 49, 27, 19, 52, 51, 20, 177, 75, 81, 30, 19, 29, 36, 628, 8, 85, 31, 68, 97, 94, 34, 62, 183, 58, 217, 91, 98, 42, 45, 8, 19, 106, 137, 12, 116, 66, 12, 28, 94, 29, 15, 28, 150, 13, 176, 121, 43, 60, 12, 323, 13, 36, 186, 106, 21, 35, 43, 121, 153, 65, 32, 22, 18, 69, 22, 3, 65, 5, 93, 21, 16, 3, 8, 125, 674, 76, 50, 9, 56, 316, 34, 163, 17, 8, 35, 22, 23, 11, 74, 67, 4, 26, 124, 28, 92, 82, 7, 111, 355, 47, 239, 30, 90, 12, 13, 49, 44, 59, 115, 73, 66, 17, 58, 22, 179, 17, 43, 65, 14, 22, 143, 28, 33, 19, 54, 99, 17, 9, 103, 35, 44, 62, 83, 185, 66, 192, 85, 33, 259, 25, 126, 39, 15, 77, 82, 50, 15, 146, 276, 1102, 1102, 3, 169, 67, 37, 37, 177, 31, 70, 24, 10, 51]}, "baseline": {"name": "chosen", "wins": 376, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+csft_pythia2-8b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 60263, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+csft_pythia2-8b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+csft_pythia2-8b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-2.8b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia2-8b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 18:51:58.071079", "total": 512, "seed": 0, "exp_name": "archangel_sft+csft_pythia6-9b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 95, "lengths": [21, 85, 32, 9, 21, 76, 48, 45, 482, 219, 362, 43, 66, 15, 9, 23, 222, 38, 64, 15, 36, 63, 12, 50, 392, 22, 7, 39, 45, 71, 36, 156, 86, 377, 51, 41, 25, 40, 11, 42, 10, 659, 69, 16, 43, 82, 328, 123, 29, 10, 54, 46, 169, 395, 116, 143, 590, 21, 6, 45, 75, 115, 77, 34, 67, 105, 13, 103, 22, 231, 85, 16, 158, 250, 16, 48, 117, 21, 49, 11, 26, 32, 79, 44, 8, 199, 48, 9, 93, 40, 100, 116, 34, 11, 11, 130, 27, 52, 7, 21, 13, 3, 118, 113, 27, 445, 112, 100, 3, 28, 44, 72, 36, 20, 62, 25, 53, 22, 53, 130, 91, 15, 227, 73, 18, 68, 56, 17, 53, 99, 58, 38, 117, 77, 97, 15, 13, 38, 29, 71, 58, 79, 85, 164, 1024, 156, 506, 74, 1022, 19, 38, 39, 177, 47, 76, 7, 14, 8, 149, 18, 36, 830, 82, 97, 19, 753, 85, 44, 20, 60, 136, 152, 23, 21, 9, 19, 86, 30, 153, 235, 299, 78, 21, 30, 27, 44, 12, 45, 20, 39, 38, 6, 71, 56, 19, 47, 29, 5, 273, 136, 41, 56, 54, 206, 118, 11, 26, 88, 88, 33, 31, 39, 8, 1621, 95, 86, 129, 122, 93, 49, 22, 135, 70, 154, 29, 58, 36, 37, 24, 91, 60, 153, 106, 95, 96, 65, 32, 401, 89, 34, 46, 4, 133, 122, 10, 158, 142, 6, 16, 34, 111, 9, 117, 48, 135, 227, 5, 11, 14, 104, 13, 35, 67, 6, 28, 76, 119, 152, 13, 39, 217, 102, 104, 83, 96, 91, 70, 71, 32, 316, 153, 53, 89, 21, 276, 50, 8, 20, 113, 16, 17, 14, 104, 131, 29, 134, 467, 146, 61, 91, 87, 31, 81, 1340, 195, 46, 19, 121, 231, 72, 12, 10, 141, 335, 23, 29, 33, 34, 81, 39, 19, 40, 121, 111, 34, 28, 76, 95, 47, 80, 12, 94, 62, 144, 190, 22, 35, 71, 21, 29, 78, 63, 96, 35, 44, 155, 36, 79, 196, 39, 106, 6, 120, 18, 183, 39, 40, 18, 78, 66, 78, 77, 54, 375, 65, 10, 117, 60, 63, 93, 98, 62, 74, 19, 66, 3, 49, 214, 296, 101, 24, 79, 68, 30, 80, 24, 38, 145, 115, 112, 117, 113, 104, 46, 110, 67, 191, 60, 12, 158, 10, 48, 56, 280, 32, 70, 493, 40, 73, 19, 88, 17, 137, 107, 24, 21, 248, 57, 50, 47, 18, 6, 60, 1289, 11, 45, 91, 18, 93, 207, 44, 26, 54, 39, 9, 21, 56, 99, 141, 27, 24, 50, 15, 25, 35, 28, 5, 208, 128, 33, 818, 22, 96, 20, 70, 90, 110, 49, 104, 41, 66, 23, 138, 9, 118, 42, 141, 68, 266, 12, 22, 20, 18, 14, 51, 50, 114, 23, 117, 69, 8, 94, 90, 55, 29, 64, 38, 21, 81, 245, 59, 14, 43, 91, 117, 24, 127, 124, 7, 49, 18, 9, 115, 123, 104, 619, 13, 55, 935, 25, 7, 69]}, "baseline": {"name": "chosen", "wins": 381, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+csft_pythia6-9b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 39793, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+csft_pythia6-9b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+csft_pythia6-9b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-6.9b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia6-9b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 19:07:16.606048", "total": 512, "seed": 0, "exp_name": "archangel_sft+csft_pythia12-0b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 112, "lengths": [61, 86, 41, 30, 67, 14, 83, 310, 440, 16, 98, 16, 33, 28, 179, 908, 26, 31, 28, 19, 42, 694, 12, 38, 80, 143, 3, 78, 77, 134, 184, 149, 216, 132, 55, 46, 17, 19, 42, 69, 48, 72, 22, 117, 23, 138, 91, 67, 10, 11, 17, 68, 70, 53, 35, 11, 830, 229, 6, 60, 132, 16, 24, 77, 115, 10, 9, 100, 67, 35, 158, 41, 18, 49, 78, 131, 526, 15, 120, 85, 10, 37, 71, 83, 17, 250, 61, 53, 60, 192, 7, 51, 49, 33, 17, 65, 19, 63, 56, 20, 13, 8, 80, 50, 33, 348, 18, 45, 1232, 72, 18, 30, 16, 96, 12, 23, 57, 30, 34, 19, 54, 61, 1231, 7, 31, 88, 155, 46, 37, 322, 11, 34, 13, 47, 117, 120, 31, 111, 102, 129, 147, 11, 135, 64, 514, 40, 25, 281, 31, 25, 536, 173, 54, 451, 10, 7, 25, 30, 31, 51, 8, 13, 270, 53, 44, 10, 101, 56, 12, 21, 28, 85, 10, 78, 22, 59, 42, 75, 18, 59, 40, 83, 31, 316, 396, 35, 6, 87, 190, 124, 28, 6, 25, 29, 43, 7, 97, 135, 139, 177, 50, 15, 55, 201, 331, 35, 44, 199, 398, 13, 46, 112, 119, 137, 215, 21, 113, 50, 102, 5, 49, 23, 60, 1622, 65, 52, 47, 30, 182, 135, 64, 26, 156, 34, 21, 150, 10, 237, 29, 31, 467, 42, 206, 54, 8, 108, 38, 50, 22, 20, 122, 61, 192, 863, 6, 152, 6, 13, 65, 44, 13, 12, 99, 109, 463, 70, 30, 76, 5, 102, 8, 189, 30, 337, 51, 10, 53, 120, 40, 73, 59, 70, 98, 14, 67, 233, 68, 53, 129, 401, 123, 18, 114, 136, 60, 38, 389, 15, 3, 67, 154, 58, 29, 49, 49, 46, 36, 29, 142, 34, 37, 11, 428, 146, 39, 31, 68, 72, 110, 11, 114, 55, 37, 126, 8, 37, 141, 51, 19, 323, 112, 165, 47, 56, 181, 62, 52, 32, 3, 192, 49, 143, 103, 30, 37, 118, 40, 49, 71, 69, 97, 14, 19, 26, 179, 58, 358, 22, 51, 44, 112, 57, 27, 85, 15, 25, 30, 37, 143, 28, 10, 65, 175, 63, 855, 87, 78, 33, 152, 141, 37, 75, 48, 60, 194, 19, 83, 64, 35, 16, 783, 53, 422, 61, 585, 17, 116, 34, 10, 39, 17, 23, 17, 174, 20, 28, 11, 512, 193, 13, 18, 174, 19, 75, 16, 11, 55, 32, 40, 27, 18, 6, 32, 1292, 110, 12, 59, 22, 34, 98, 6, 53, 12, 33, 112, 41, 33, 88, 10, 51, 52, 13, 188, 21, 89, 22, 5, 69, 37, 7, 1531, 28, 48, 69, 44, 19, 3, 5, 90, 73, 256, 716, 92, 51, 61, 11, 91, 141, 63, 28, 88, 4, 55, 228, 14, 150, 25, 51, 17, 178, 177, 39, 55, 143, 63, 60, 94, 53, 45, 11, 16, 12, 14, 133, 89, 734, 172, 359, 8, 55, 5, 43, 41, 36, 103, 123, 616, 73, 101, 32, 6, 47]}, "baseline": {"name": "chosen", "wins": 360, "lengths": [10, 64, 50, 86, 177, 98, 33, 98, 631, 160, 134, 26, 98, 175, 57, 86, 87, 18, 112, 138, 157, 20, 5, 33, 144, 61, 6, 127, 46, 129, 24, 49, 53, 251, 171, 10, 35, 69, 58, 113, 37, 132, 52, 238, 24, 53, 255, 98, 1201, 42, 20, 85, 145, 47, 18, 13, 50, 546, 6, 489, 50, 33, 11, 20, 41, 117, 196, 60, 74, 145, 22, 19, 58, 41, 101, 37, 99, 7, 114, 14, 36, 102, 72, 59, 6, 158, 16, 442, 189, 29, 79, 33, 15, 13, 22, 114, 44, 31, 11, 15, 8, 17, 65, 86, 84, 83, 48, 196, 33, 42, 42, 41, 125, 354, 17, 25, 9, 32, 29, 42, 32, 31, 505, 56, 91, 30, 147, 74, 7, 50, 57, 45, 115, 19, 160, 22, 12, 79, 145, 218, 105, 69, 226, 34, 18, 185, 30, 18, 69, 88, 73, 72, 224, 220, 81, 6, 21, 10, 108, 34, 36, 1964, 257, 85, 181, 298, 62, 46, 7, 46, 25, 47, 69, 54, 36, 107, 239, 14, 243, 155, 74, 35, 35, 43, 431, 55, 6, 249, 26, 77, 193, 6, 33, 246, 41, 16, 98, 82, 293, 111, 83, 52, 7, 76, 48, 19, 15, 126, 88, 20, 35, 82, 91, 147, 73, 17, 56, 63, 219, 154, 14, 338, 22, 105, 3, 89, 7, 9, 63, 76, 62, 457, 46, 37, 148, 46, 19, 327, 181, 146, 59, 16, 47, 154, 43, 104, 289, 100, 30, 56, 143, 134, 131, 75, 6, 169, 5, 14, 22, 96, 6, 27, 231, 105, 19, 5, 15, 38, 6, 110, 33, 78, 85, 108, 16, 138, 115, 97, 59, 58, 326, 250, 62, 14, 61, 128, 106, 59, 128, 47, 58, 67, 216, 162, 15, 116, 322, 43, 23, 48, 36, 113, 9, 684, 35, 64, 18, 42, 246, 82, 9, 30, 122, 106, 9, 106, 48, 36, 94, 141, 35, 65, 41, 183, 24, 20, 48, 97, 28, 35, 62, 14, 51, 43, 191, 251, 194, 36, 28, 13, 103, 12, 89, 69, 153, 19, 50, 22, 22, 34, 67, 32, 87, 151, 115, 8, 108, 65, 69, 42, 155, 20, 45, 176, 47, 26, 426, 44, 125, 1236, 146, 11, 56, 51, 73, 35, 25, 12, 130, 41, 38, 67, 47, 16, 35, 47, 81, 42, 123, 14, 466, 32, 111, 26, 218, 98, 77, 191, 33, 41, 11, 12, 46, 14, 61, 123, 109, 135, 69, 48, 22, 30, 9, 98, 61, 37, 17, 136, 67, 54, 11, 12, 481, 50, 28, 146, 11, 4, 159, 111, 61, 192, 28, 15, 57, 21, 38, 88, 476, 93, 26, 317, 393, 38, 71, 23, 6, 54, 56, 87, 115, 37, 195, 22, 51, 55, 22, 27, 49, 32, 27, 28, 42, 36, 99, 63, 69, 113, 8, 54, 46, 9, 64, 57, 34, 70, 31, 70, 107, 209, 53, 56, 40, 146, 53, 44, 57, 58, 13, 127, 49, 61, 251, 66, 13, 39, 226, 193, 22, 232, 7, 14, 56, 59, 7, 195, 66, 36, 56, 112, 18, 45]}, "config": {"seed": 1, "exp_name": "archangel_sft+csft_pythia12-0b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 37741, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+csft_pythia12-0b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+csft_pythia12-0b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "EleutherAI/pythia-12b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_pythia12-0b/LATEST/policy.pt", "block_name": "GPTNeoXLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": false}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 19:22:12.759836", "total": 512, "seed": 0, "exp_name": "archangel_sft+csft_llama7b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 194, "lengths": [9, 135, 151, 18, 73, 148, 48, 61, 9, 255, 283, 73, 27, 83, 241, 94, 318, 58, 159, 124, 42, 23, 10, 25, 141, 71, 4, 30, 101, 141, 112, 147, 120, 88, 92, 132, 9, 96, 115, 130, 16, 63, 12, 15, 742, 25, 58, 100, 208, 10, 40, 190, 114, 58, 20, 19, 742, 308, 6, 85, 34, 45, 17, 18, 33, 119, 12, 69, 155, 171, 90, 242, 43, 73, 95, 26, 248, 22, 203, 38, 234, 27, 65, 69, 21, 116, 31, 22, 38, 14, 159, 159, 4, 38, 24, 153, 143, 53, 15, 6, 101, 10, 207, 123, 70, 95, 135, 243, 21, 64, 12, 31, 56, 50, 30, 24, 25, 109, 28, 16, 44, 18, 293, 123, 138, 30, 575, 368, 8, 33, 24, 466, 146, 136, 35, 24, 30, 120, 33, 446, 169, 33, 146, 133, 1026, 115, 13, 97, 94, 22, 144, 135, 46, 148, 56, 16, 435, 109, 61, 56, 102, 174, 313, 27, 814, 94, 10, 78, 12, 15, 98, 4, 71, 53, 221, 70, 268, 37, 254, 52, 55, 103, 10, 70, 118, 7, 9, 118, 90, 80, 18, 12, 23, 50, 78, 8, 47, 386, 39, 150, 127, 22, 301, 39, 244, 36, 64, 140, 55, 209, 43, 59, 26, 25, 59, 76, 39, 24, 565, 49, 26, 105, 20, 161, 15, 238, 11, 12, 61, 150, 117, 370, 62, 16, 8, 9, 59, 374, 19, 68, 12, 31, 140, 21, 24, 82, 68, 80, 32, 214, 141, 45, 143, 98, 6, 43, 6, 20, 99, 51, 8, 129, 137, 148, 11, 17, 159, 14, 18, 48, 240, 240, 97, 44, 240, 8, 50, 3, 34, 40, 92, 33, 55, 18, 156, 160, 118, 65, 149, 56, 207, 20, 30, 51, 45, 117, 66, 100, 6, 32, 160, 134, 97, 58, 77, 9, 6, 46, 49, 55, 12, 6, 120, 104, 14, 28, 19, 175, 174, 66, 45, 230, 34, 171, 55, 80, 16, 73, 26, 22, 97, 47, 45, 39, 147, 259, 223, 39, 22, 75, 141, 21, 17, 54, 63, 43, 26, 82, 134, 55, 76, 22, 18, 29, 325, 70, 47, 28, 116, 120, 80, 22, 68, 269, 17, 12, 18, 93, 161, 128, 40, 37, 87, 32, 78, 52, 50, 336, 27, 232, 97, 106, 130, 15, 107, 140, 217, 39, 76, 9, 474, 194, 196, 18, 102, 88, 119, 96, 25, 271, 30, 32, 19, 73, 26, 207, 64, 14, 145, 90, 27, 13, 24, 106, 34, 21, 297, 103, 46, 25, 7, 6, 70, 213, 291, 51, 26, 20, 53, 120, 137, 140, 9, 81, 42, 83, 24, 151, 52, 863, 8, 936, 229, 29, 12, 68, 7, 44, 205, 92, 139, 33, 147, 44, 15, 59, 157, 115, 104, 116, 122, 188, 20, 53, 54, 9, 58, 196, 8, 63, 65, 16, 45, 20, 17, 89, 33, 94, 58, 57, 78, 58, 14, 77, 124, 41, 42, 25, 114, 64, 34, 599, 12, 183, 297, 124, 26, 175, 16, 34, 3, 6, 27, 31, 173, 314, 54, 34, 38, 22, 7, 41]}, "baseline": {"name": "chosen", "wins": 291, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2007, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+csft_llama7b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 50359, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+csft_llama7b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+csft_llama7b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama7b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 19:36:58.640303", "total": 512, "seed": 0, "exp_name": "archangel_sft+csft_llama13b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 192, "lengths": [9, 52, 35, 22, 89, 32, 13, 57, 417, 165, 8, 327, 58, 23, 344, 11, 73, 52, 59, 389, 89, 51, 10, 16, 143, 491, 4, 85, 52, 74, 164, 151, 90, 258, 150, 80, 30, 149, 96, 104, 418, 66, 93, 44, 210, 6, 61, 95, 620, 95, 6, 90, 141, 21, 37, 31, 188, 237, 12, 21, 89, 11, 20, 61, 85, 279, 22, 63, 44, 46, 81, 94, 9, 38, 104, 26, 190, 4, 53, 19, 48, 13, 63, 34, 6, 94, 45, 279, 56, 84, 43, 50, 15, 5, 15, 525, 23, 27, 29, 43, 9, 13, 38, 43, 132, 52, 47, 56, 68, 12, 16, 20, 91, 94, 35, 27, 8, 27, 32, 94, 81, 37, 210, 37, 241, 10, 54, 24, 10, 35, 98, 40, 8, 22, 63, 27, 37, 17, 50, 30, 77, 7, 81, 408, 1026, 224, 12, 176, 6, 248, 206, 84, 345, 34, 21, 15, 417, 240, 13, 14, 113, 63, 47, 45, 136, 813, 13, 70, 23, 57, 84, 52, 109, 37, 72, 26, 177, 14, 374, 40, 26, 18, 8, 12, 167, 15, 25, 42, 101, 47, 23, 6, 25, 21, 107, 11, 54, 115, 85, 166, 110, 27, 79, 75, 315, 17, 46, 265, 73, 77, 7, 57, 20, 545, 293, 56, 109, 99, 193, 23, 30, 47, 32, 1589, 38, 404, 10, 11, 66, 64, 43, 317, 132, 1026, 73, 136, 26, 381, 284, 162, 104, 23, 139, 11, 41, 145, 139, 87, 14, 265, 49, 36, 80, 128, 6, 238, 6, 18, 71, 184, 31, 62, 112, 140, 147, 18, 201, 72, 5, 113, 53, 169, 128, 97, 162, 17, 79, 70, 11, 240, 240, 69, 48, 6, 69, 88, 240, 21, 119, 32, 27, 19, 39, 117, 12, 24, 290, 97, 5, 110, 379, 43, 26, 30, 53, 52, 16, 188, 27, 28, 69, 16, 54, 92, 8, 26, 27, 207, 163, 237, 45, 255, 10, 72, 38, 24, 96, 36, 94, 140, 100, 213, 52, 67, 47, 106, 83, 40, 16, 161, 54, 20, 81, 176, 112, 109, 26, 50, 104, 42, 11, 6, 47, 113, 145, 10, 108, 43, 27, 56, 21, 116, 294, 682, 35, 6, 57, 130, 44, 60, 68, 101, 21, 71, 29, 35, 49, 65, 99, 9, 14, 32, 5, 11, 14, 39, 51, 65, 27, 10, 174, 96, 439, 38, 231, 18, 17, 419, 8, 48, 49, 8, 44, 85, 49, 287, 342, 27, 124, 36, 87, 15, 10, 42, 27, 34, 901, 25, 12, 22, 11, 6, 100, 479, 92, 73, 113, 13, 36, 87, 61, 50, 12, 10, 56, 19, 188, 56, 375, 97, 24, 89, 73, 104, 43, 37, 7, 198, 107, 112, 105, 36, 142, 23, 34, 118, 34, 125, 57, 50, 18, 56, 113, 14, 75, 101, 4, 70, 13, 59, 66, 11, 59, 10, 41, 149, 37, 66, 77, 40, 46, 88, 23, 191, 38, 87, 65, 33, 17, 23, 11, 149, 83, 104, 122, 38, 12, 363, 67, 73, 20, 3, 21, 23, 47, 22, 40, 30, 102, 13, 39, 259]}, "baseline": {"name": "chosen", "wins": 284, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2007, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+csft_llama13b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 47493, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+csft_llama13b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+csft_llama13b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-13b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama13b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 19:51:29.257186", "total": 512, "seed": 0, "exp_name": "archangel_sft+csft_llama30b", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 222, "lengths": [8, 32, 96, 57, 87, 13, 60, 35, 407, 115, 29, 384, 51, 43, 149, 29, 867, 44, 184, 14, 47, 98, 60, 5, 140, 125, 6, 40, 25, 110, 34, 307, 179, 235, 25, 39, 24, 32, 228, 119, 20, 139, 85, 46, 55, 102, 86, 82, 256, 141, 124, 19, 115, 57, 69, 7, 189, 79, 7, 522, 73, 17, 40, 62, 97, 16, 36, 117, 56, 28, 19, 15, 118, 64, 102, 46, 98, 25, 141, 33, 29, 39, 40, 113, 8, 200, 48, 44, 173, 49, 114, 32, 70, 6, 13, 108, 70, 90, 6, 24, 32, 9, 142, 54, 11, 295, 17, 173, 12, 8, 20, 52, 68, 28, 44, 13, 11, 12, 28, 31, 36, 71, 50, 24, 44, 31, 318, 42, 125, 72, 27, 220, 77, 35, 136, 13, 66, 111, 338, 220, 67, 86, 41, 378, 1026, 129, 21, 25, 12, 151, 52, 49, 553, 8, 64, 20, 51, 38, 37, 16, 63, 248, 22, 37, 485, 20, 29, 121, 9, 139, 32, 15, 47, 168, 59, 80, 28, 43, 113, 81, 48, 78, 16, 60, 360, 13, 12, 70, 67, 18, 23, 6, 31, 221, 141, 9, 128, 9, 550, 142, 40, 9, 276, 156, 30, 71, 170, 75, 117, 145, 27, 30, 49, 8, 109, 63, 91, 21, 250, 96, 21, 258, 40, 298, 7, 77, 58, 14, 18, 147, 110, 117, 80, 35, 24, 59, 69, 349, 91, 99, 94, 41, 35, 44, 14, 62, 71, 46, 12, 52, 163, 141, 181, 177, 4, 138, 31, 587, 23, 95, 14, 67, 99, 48, 148, 22, 261, 103, 13, 147, 141, 106, 92, 74, 40, 34, 64, 52, 18, 132, 118, 61, 158, 12, 76, 240, 240, 46, 156, 44, 70, 25, 36, 68, 17, 66, 509, 159, 26, 15, 160, 195, 8, 7, 19, 20, 60, 84, 56, 145, 30, 31, 40, 151, 22, 18, 25, 332, 89, 476, 38, 41, 42, 213, 10, 78, 124, 10, 36, 11, 190, 191, 226, 95, 119, 264, 88, 102, 24, 53, 63, 18, 35, 48, 139, 1602, 186, 82, 28, 21, 69, 6, 73, 683, 261, 21, 279, 12, 15, 26, 97, 40, 299, 164, 43, 13, 25, 60, 15, 200, 22, 120, 48, 126, 63, 33, 40, 112, 172, 158, 45, 58, 29, 7, 46, 34, 10, 50, 81, 33, 145, 122, 251, 18, 165, 116, 199, 81, 8, 77, 95, 9, 25, 42, 11, 319, 88, 115, 166, 11, 18, 40, 12, 111, 53, 51, 17, 61, 51, 47, 11, 13, 12, 26, 307, 27, 20, 23, 39, 111, 6, 29, 33, 14, 60, 26, 26, 106, 162, 156, 9, 434, 19, 20, 15, 30, 4, 10, 160, 18, 216, 70, 67, 23, 26, 64, 145, 199, 42, 73, 65, 477, 35, 15, 56, 220, 181, 144, 44, 23, 151, 13, 111, 26, 8, 95, 33, 14, 74, 119, 117, 47, 21, 49, 61, 102, 18, 52, 6, 11, 30, 100, 6, 65, 199, 9, 26, 262, 53, 116, 14, 10, 24, 18, 60, 97, 57, 42, 18, 24, 37, 373]}, "baseline": {"name": "chosen", "wins": 263, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2007, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200, 6, 15, 26, 117, 7, 28, 248, 114, 21, 6, 16, 43, 7, 118, 35, 91, 90, 115, 20, 148, 130, 107, 61, 63, 371, 289, 68, 17, 73, 144, 123, 72, 140, 53, 64, 70, 253, 177, 16, 141, 378, 45, 27, 54, 41, 122, 10, 741, 45, 71, 19, 47, 267, 84, 10, 31, 134, 121, 10, 114, 51, 42, 104, 181, 42, 69, 42, 197, 27, 21, 52, 104, 34, 42, 64, 16, 63, 52, 209, 286, 213, 41, 30, 15, 123, 14, 93, 77, 171, 22, 54, 24, 24, 36, 72, 34, 99, 176, 123, 10, 128, 73, 71, 46, 163, 22, 49, 197, 56, 27, 517, 55, 138, 1451, 158, 14, 65, 54, 84, 39, 30, 14, 147, 45, 44, 80, 54, 17, 36, 50, 86, 45, 130, 19, 518, 34, 143, 33, 243, 101, 81, 217, 36, 43, 14, 14, 50, 17, 66, 132, 115, 158, 77, 50, 27, 35, 10, 115, 67, 46, 21, 146, 68, 64, 13, 13, 535, 56, 29, 153, 12, 5, 170, 116, 64, 223, 29, 16, 62, 26, 41, 95, 504, 96, 27, 340, 436, 47, 80, 34, 7, 61, 57, 104, 131, 38, 203, 23, 56, 57, 24, 28, 52, 36, 30, 31, 46, 37, 107, 63, 74, 134, 9, 57, 57, 10, 74, 61, 37, 83, 35, 76, 110, 224, 66, 60, 43, 161, 55, 47, 71, 60, 14, 140, 52, 77, 270, 69, 15, 41, 262, 213, 24, 256, 8, 16, 62, 63, 10, 199, 74, 39, 67, 120, 20, 47]}, "config": {"seed": 1, "exp_name": "archangel_sft+csft_llama30b", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 58735, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/archangel_sft+csft_llama30b", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/archangel_sft+csft_llama30b/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-30b", "tokenizer_name_or_path": null, "load_from": "archangel_sft_llama30b/LATEST/policy.pt", "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 16, "gradient_accumulation_steps": 2, "eval_batch_size": 16, "use_flash_attention": true}, "loss": {"name": "csft", "trainer": "SFTTrainer", "dataloader": "ConditionalSFTDataLoader", "use_reference_model": false, "chosen_control_token": "<|good|>", "rejected_control_token": "<|bad|>"}}}
{"date": "2024-01-08 20:56:44.182741", "total": 256, "seed": 0, "exp_name": "fracdata_kto_llama7b_FD0.1_WD13.33_FU1.0_WU1.0", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 84, "lengths": [387, 65, 63, 18, 382, 81, 87, 108, 167, 42, 25, 855, 42, 58, 106, 301, 8, 109, 47, 176, 24, 56, 51, 24, 36, 285, 6, 1085, 99, 148, 26, 12, 45, 55, 229, 82, 395, 84, 23, 123, 77, 397, 136, 37, 111, 52, 192, 95, 168, 92, 36, 83, 96, 236, 96, 79, 742, 223, 6, 444, 134, 590, 109, 65, 66, 112, 211, 85, 165, 318, 58, 107, 448, 158, 131, 107, 867, 81, 196, 21, 158, 131, 81, 398, 9, 297, 88, 161, 394, 31, 94, 64, 6, 17, 85, 757, 132, 111, 9, 93, 31, 62, 182, 483, 3, 174, 395, 94, 190, 27, 20, 22, 80, 474, 28, 52, 282, 164, 36, 104, 48, 156, 232, 40, 197, 168, 369, 997, 12, 106, 103, 308, 111, 106, 480, 49, 129, 79, 232, 123, 182, 19, 55, 231, 1026, 213, 104, 369, 127, 265, 41, 680, 272, 334, 24, 6, 209, 70, 11, 27, 698, 752, 305, 14, 427, 9, 70, 304, 42, 30, 103, 710, 28, 92, 130, 550, 188, 27, 142, 111, 394, 20, 354, 38, 281, 7, 53, 14, 65, 90, 213, 6, 131, 133, 1590, 4, 166, 567, 226, 170, 44, 82, 74, 493, 233, 12, 7, 67, 71, 41, 118, 30, 25, 119, 274, 68, 47, 126, 352, 56, 48, 79, 337, 533, 9, 92, 79, 112, 108, 229, 62, 456, 38, 17, 75, 159, 43, 363, 637, 124, 34, 99, 85, 104, 39, 91, 680, 55, 112, 562, 71, 107, 26, 24, 4, 239]}, "baseline": {"name": "chosen", "wins": 156, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200]}, "config": {"seed": 1, "exp_name": "fracdata_kto_llama7b_FD0.1_WD13.33_FU1.0_WU1.0", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 33519, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/fracdata_kto_llama7b_FD0.1_WD13.33_FU1.0_WU1.0", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/fracdata_kto_llama7b_FD0.1_WD13.33_FU1.0_WU1.0/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 0.1, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 13.33, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 21:05:01.436100", "total": 256, "seed": 0, "exp_name": "fracdata_kto_llama7b_FD0.01_WD133.33_FU1.0_WU1.0", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 73, "lengths": [727, 247, 67, 540, 391, 1086, 778, 60, 204, 92, 78, 52, 602, 27, 255, 419, 582, 41, 56, 84, 123, 51, 422, 831, 36, 51, 6, 234, 118, 67, 135, 12, 742, 86, 162, 94, 157, 742, 237, 742, 740, 309, 479, 742, 741, 507, 233, 101, 742, 741, 12, 742, 238, 107, 522, 461, 150, 741, 22, 742, 621, 742, 209, 413, 800, 1464, 34, 546, 461, 87, 621, 18, 370, 74, 337, 53, 1463, 26, 14, 250, 1351, 135, 443, 347, 1464, 1464, 762, 442, 395, 9, 277, 301, 7, 93, 82, 595, 74, 67, 29, 520, 55, 165, 15, 997, 997, 315, 160, 229, 225, 24, 7, 29, 197, 372, 7, 236, 278, 408, 113, 619, 363, 305, 615, 208, 997, 161, 996, 39, 1025, 196, 103, 71, 1026, 126, 181, 1025, 1025, 436, 791, 1025, 855, 43, 1025, 167, 1026, 439, 109, 225, 22, 1025, 1025, 1025, 60, 241, 57, 7, 34, 845, 21, 33, 371, 18, 814, 497, 813, 813, 39, 394, 445, 161, 576, 373, 10, 814, 87, 233, 410, 80, 140, 271, 265, 814, 78, 473, 389, 39, 10, 43, 814, 128, 459, 12, 77, 319, 110, 21, 309, 160, 1481, 1590, 66, 21, 78, 273, 1242, 16, 20, 100, 59, 71, 90, 25, 14, 265, 931, 1590, 225, 1590, 74, 122, 116, 165, 10, 1590, 1026, 21, 132, 18, 128, 282, 1026, 645, 750, 838, 87, 83, 27, 461, 9, 282, 234, 15, 1026, 39, 111, 410, 537, 103, 181, 707, 1025, 29, 107, 1025, 12, 196]}, "baseline": {"name": "chosen", "wins": 170, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200]}, "config": {"seed": 1, "exp_name": "fracdata_kto_llama7b_FD0.01_WD133.33_FU1.0_WU1.0", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 44579, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/fracdata_kto_llama7b_FD0.01_WD133.33_FU1.0_WU1.0", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/fracdata_kto_llama7b_FD0.01_WD133.33_FU1.0_WU1.0/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 0.01, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 133.33, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 22:04:39.265136", "total": 256, "seed": 0, "exp_name": "fracdata_kto_llama7b_FD0.5_WD2.66_FU1.0_WU1.0", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 103, "lengths": [277, 211, 52, 13, 257, 185, 43, 68, 351, 285, 202, 151, 39, 54, 251, 129, 8, 85, 154, 118, 94, 39, 90, 36, 128, 150, 6, 489, 22, 159, 136, 353, 273, 74, 57, 139, 34, 34, 88, 210, 49, 177, 65, 81, 260, 76, 111, 83, 243, 31, 237, 287, 95, 342, 79, 84, 82, 741, 6, 217, 94, 260, 41, 68, 98, 647, 165, 64, 76, 44, 21, 52, 285, 193, 36, 41, 331, 30, 92, 44, 30, 89, 70, 144, 3, 125, 40, 36, 117, 121, 377, 224, 63, 165, 76, 249, 319, 197, 6, 21, 68, 169, 15, 997, 98, 169, 704, 88, 121, 117, 11, 31, 56, 595, 33, 997, 86, 467, 19, 118, 38, 241, 729, 198, 249, 84, 280, 180, 28, 112, 93, 396, 479, 250, 87, 239, 10, 47, 306, 389, 147, 8, 134, 223, 1026, 245, 178, 231, 294, 132, 125, 341, 340, 242, 89, 6, 162, 149, 67, 44, 277, 523, 146, 60, 274, 68, 65, 75, 109, 231, 64, 266, 42, 132, 472, 316, 282, 72, 210, 188, 115, 12, 105, 239, 814, 4, 24, 235, 620, 76, 355, 6, 46, 89, 166, 25, 85, 119, 340, 110, 79, 1590, 153, 132, 370, 177, 123, 115, 44, 37, 16, 107, 42, 156, 129, 91, 85, 33, 445, 60, 76, 306, 59, 186, 23, 44, 112, 31, 105, 91, 65, 192, 261, 71, 131, 105, 116, 355, 552, 55, 104, 39, 169, 129, 24, 121, 197, 17, 161, 177, 140, 50, 174, 83, 4, 205]}, "baseline": {"name": "chosen", "wins": 137, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200]}, "config": {"seed": 1, "exp_name": "fracdata_kto_llama7b_FD0.5_WD2.66_FU1.0_WU1.0", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 37545, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/fracdata_kto_llama7b_FD0.5_WD2.66_FU1.0_WU1.0", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/fracdata_kto_llama7b_FD0.5_WD2.66_FU1.0_WU1.0/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 0.5, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 2.66, "undesirable_weight": 1.0}}}
{"date": "2024-01-08 22:17:27.822134", "total": 256, "seed": 0, "exp_name": "fracdata_kto_llama7b_FD0.25_WD5.32_FU1.0_WU1.0", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 102, "lengths": [362, 189, 85, 18, 105, 81, 43, 62, 837, 42, 24, 137, 128, 58, 91, 81, 8, 109, 39, 174, 38, 58, 114, 98, 34, 201, 6, 330, 66, 147, 26, 229, 205, 177, 319, 86, 147, 742, 112, 624, 176, 91, 215, 140, 634, 93, 259, 232, 281, 20, 322, 115, 92, 80, 80, 51, 742, 239, 6, 271, 203, 500, 118, 503, 235, 69, 49, 102, 71, 6, 40, 36, 203, 24, 76, 63, 248, 160, 84, 85, 45, 22, 73, 351, 6, 155, 73, 375, 438, 138, 9, 155, 6, 6, 165, 574, 102, 128, 16, 344, 11, 38, 66, 349, 6, 154, 166, 56, 143, 5, 18, 93, 64, 308, 22, 27, 71, 85, 52, 117, 50, 99, 507, 39, 56, 155, 225, 74, 914, 32, 527, 232, 278, 78, 145, 92, 29, 112, 202, 78, 358, 126, 140, 53, 1026, 195, 158, 144, 164, 131, 42, 337, 300, 91, 30, 9, 246, 1025, 9, 207, 257, 230, 224, 41, 57, 479, 48, 162, 814, 113, 86, 62, 67, 108, 72, 347, 108, 97, 477, 202, 103, 38, 20, 253, 575, 30, 4, 113, 92, 438, 18, 35, 61, 77, 125, 45, 213, 77, 350, 354, 55, 113, 313, 87, 74, 10, 68, 168, 82, 179, 124, 14, 125, 526, 171, 59, 70, 45, 519, 145, 83, 268, 10, 750, 45, 97, 194, 73, 34, 76, 35, 240, 65, 154, 86, 265, 100, 347, 163, 133, 356, 10, 46, 240, 27, 106, 161, 5, 220, 291, 210, 65, 303, 157, 13, 194]}, "baseline": {"name": "chosen", "wins": 139, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200]}, "config": {"seed": 1, "exp_name": "fracdata_kto_llama7b_FD0.25_WD5.32_FU1.0_WU1.0", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 36401, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/fracdata_kto_llama7b_FD0.25_WD5.32_FU1.0_WU1.0", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/fracdata_kto_llama7b_FD0.25_WD5.32_FU1.0_WU1.0/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 0.25, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 5.32, "undesirable_weight": 1.0}}}
{"date": "2024-01-09 01:22:15.368034", "total": 256, "seed": 0, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.0_FU0.1_WU7.5", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 85, "lengths": [202, 127, 174, 325, 373, 157, 128, 42, 988, 1086, 490, 341, 156, 181, 165, 36, 1086, 222, 47, 236, 359, 54, 236, 124, 36, 491, 6, 659, 22, 146, 289, 277, 144, 55, 78, 120, 48, 574, 492, 742, 349, 742, 527, 319, 646, 38, 216, 104, 742, 426, 84, 71, 104, 321, 156, 118, 742, 741, 54, 174, 100, 742, 114, 504, 263, 495, 169, 890, 433, 517, 154, 70, 657, 209, 79, 256, 87, 191, 102, 21, 127, 61, 752, 200, 15, 1464, 125, 327, 967, 143, 399, 168, 235, 1245, 517, 730, 246, 50, 96, 210, 160, 670, 238, 997, 49, 997, 601, 599, 122, 365, 14, 78, 61, 442, 27, 997, 92, 124, 380, 816, 166, 79, 13, 204, 327, 191, 236, 997, 178, 117, 230, 404, 434, 106, 186, 379, 709, 144, 65, 345, 162, 1025, 106, 251, 1026, 1025, 499, 373, 294, 137, 317, 1025, 61, 540, 286, 6, 294, 155, 55, 124, 173, 338, 188, 243, 184, 813, 86, 455, 806, 132, 814, 315, 184, 178, 410, 814, 54, 342, 708, 157, 355, 334, 285, 111, 813, 13, 4, 22, 146, 111, 813, 60, 180, 222, 96, 126, 969, 1590, 1176, 628, 144, 567, 213, 142, 794, 14, 222, 163, 53, 228, 667, 104, 375, 182, 525, 1158, 1217, 36, 232, 306, 122, 258, 10, 856, 35, 413, 118, 17, 117, 226, 276, 52, 1026, 95, 674, 529, 129, 1026, 1011, 257, 69, 15, 1026, 230, 50, 135, 223, 271, 224, 797, 256, 139, 154, 700, 12, 236]}, "baseline": {"name": "chosen", "wins": 156, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200]}, "config": {"seed": 1, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.0_FU0.1_WU7.5", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 38893, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.0_FU0.1_WU7.5", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.0_FU0.1_WU7.5/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 0.1, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 7.5}}}
{"date": "2024-01-09 01:32:05.051127", "total": 256, "seed": 0, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.0_FU0.01_WU75.18", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 67, "lengths": [1086, 1087, 458, 325, 1086, 748, 1086, 1084, 1086, 1086, 147, 1085, 1086, 899, 1086, 1086, 1086, 1086, 1086, 133, 1086, 59, 1086, 623, 53, 679, 1086, 1033, 1086, 1086, 1086, 1087, 742, 742, 742, 742, 742, 742, 742, 742, 742, 310, 742, 741, 742, 742, 742, 741, 742, 741, 742, 742, 741, 742, 742, 742, 742, 743, 742, 741, 741, 742, 742, 742, 741, 1287, 1464, 1464, 1464, 1464, 1464, 1463, 1464, 384, 1462, 1464, 1463, 1464, 1464, 1464, 1463, 1464, 1464, 1464, 1464, 1464, 1463, 688, 1463, 1464, 1464, 1464, 1464, 1464, 1462, 1464, 997, 998, 997, 997, 997, 997, 27, 997, 997, 997, 998, 997, 997, 146, 996, 597, 997, 997, 996, 997, 997, 997, 997, 997, 997, 479, 998, 997, 967, 997, 997, 997, 1025, 1025, 1026, 1025, 1026, 1025, 1025, 1025, 1025, 1025, 1026, 1025, 1025, 1025, 1025, 1025, 1026, 1025, 1025, 699, 1025, 1025, 1025, 1025, 1025, 1024, 1025, 1025, 344, 1024, 1025, 1025, 814, 814, 814, 814, 428, 813, 736, 814, 814, 814, 814, 814, 814, 814, 815, 814, 814, 814, 814, 741, 815, 814, 814, 815, 813, 814, 814, 814, 814, 814, 814, 814, 1588, 1590, 1589, 1590, 1590, 1590, 1590, 1012, 1590, 1590, 1590, 841, 907, 21, 1589, 1589, 59, 423, 1590, 1590, 1590, 1590, 1590, 187, 1590, 1590, 989, 1591, 1590, 1590, 1590, 1590, 1026, 1026, 1026, 1026, 1026, 1026, 1026, 1026, 1026, 1026, 1026, 1026, 228, 492, 1027, 1026, 1026, 1026, 1026, 1025, 1026, 517, 1026, 1026, 1026, 1026, 1025, 493, 1026, 1027, 1026, 1026]}, "baseline": {"name": "chosen", "wins": 181, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200]}, "config": {"seed": 1, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.0_FU0.01_WU75.18", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 47253, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.0_FU0.01_WU75.18", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.0_FU0.01_WU75.18/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 0.01, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 75.18}}}
{"date": "2024-01-09 01:39:38.532188", "total": 256, "seed": 0, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.0_FU0.5_WU1.5", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 100, "lengths": [261, 207, 81, 13, 132, 292, 43, 43, 277, 230, 209, 127, 39, 54, 228, 108, 8, 97, 305, 71, 87, 39, 158, 36, 43, 147, 6, 520, 22, 119, 86, 173, 347, 127, 356, 66, 30, 49, 64, 93, 263, 137, 244, 742, 377, 11, 169, 98, 742, 99, 123, 79, 173, 165, 196, 146, 702, 235, 6, 214, 58, 29, 144, 180, 154, 41, 166, 399, 388, 34, 89, 85, 337, 127, 19, 39, 89, 90, 248, 18, 59, 140, 100, 198, 4, 94, 40, 105, 62, 471, 98, 106, 10, 6, 209, 1464, 290, 148, 15, 46, 9, 16, 151, 997, 44, 368, 997, 66, 274, 135, 6, 67, 48, 338, 18, 48, 229, 24, 21, 171, 27, 445, 287, 93, 92, 146, 231, 27, 70, 269, 200, 125, 1025, 115, 366, 189, 18, 256, 1025, 335, 196, 11, 34, 354, 1026, 118, 176, 236, 196, 142, 69, 303, 128, 230, 31, 6, 38, 233, 285, 129, 392, 157, 345, 71, 34, 565, 59, 336, 10, 188, 48, 90, 35, 228, 224, 104, 170, 119, 416, 427, 344, 38, 108, 476, 327, 11, 7, 25, 316, 375, 90, 6, 22, 118, 468, 11, 145, 59, 864, 234, 54, 50, 215, 133, 462, 29, 66, 127, 50, 43, 26, 95, 152, 81, 145, 107, 395, 166, 314, 96, 133, 1476, 91, 227, 6, 157, 89, 40, 70, 87, 17, 135, 96, 24, 136, 49, 32, 354, 258, 127, 169, 23, 172, 86, 44, 241, 275, 10, 230, 449, 64, 65, 118, 215, 6, 64]}, "baseline": {"name": "chosen", "wins": 138, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200]}, "config": {"seed": 1, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.0_FU0.5_WU1.5", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 50705, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.0_FU0.5_WU1.5", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.0_FU0.5_WU1.5/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 0.5, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.5}}}
{"date": "2024-01-09 01:47:19.572239", "total": 256, "seed": 0, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.0_FU0.25_WU3", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 88, "lengths": [212, 162, 78, 47, 234, 134, 200, 42, 1086, 92, 187, 400, 201, 79, 86, 134, 8, 80, 73, 236, 38, 64, 298, 116, 33, 104, 6, 368, 22, 116, 67, 130, 262, 55, 163, 88, 164, 308, 216, 122, 147, 425, 96, 222, 712, 52, 233, 82, 303, 219, 37, 83, 42, 130, 98, 79, 413, 132, 6, 257, 158, 268, 177, 11, 69, 874, 204, 92, 305, 419, 61, 84, 486, 328, 29, 244, 84, 25, 749, 1119, 231, 61, 124, 349, 6, 103, 31, 223, 81, 88, 61, 123, 15, 4, 200, 30, 150, 112, 4, 38, 11, 37, 256, 560, 238, 228, 212, 105, 996, 36, 4, 44, 158, 997, 29, 89, 119, 160, 12, 183, 104, 112, 948, 84, 213, 114, 384, 73, 152, 332, 124, 235, 561, 24, 67, 100, 60, 243, 46, 190, 116, 111, 82, 368, 1026, 196, 236, 585, 261, 163, 61, 128, 222, 88, 29, 6, 125, 350, 322, 221, 113, 171, 189, 68, 156, 590, 364, 485, 28, 225, 72, 35, 96, 290, 82, 52, 103, 346, 216, 99, 135, 22, 16, 350, 813, 20, 6, 348, 78, 49, 72, 4, 32, 94, 110, 11, 142, 73, 540, 257, 162, 41, 234, 148, 644, 80, 111, 269, 13, 177, 44, 78, 288, 102, 186, 55, 74, 25, 738, 363, 74, 467, 28, 149, 6, 238, 26, 46, 40, 265, 172, 318, 102, 66, 143, 54, 215, 432, 293, 103, 297, 36, 300, 92, 84, 161, 308, 47, 114, 490, 76, 86, 335, 436, 6, 180]}, "baseline": {"name": "chosen", "wins": 152, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200]}, "config": {"seed": 1, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.0_FU0.25_WU3", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 60241, "wandb": {"enabled": true, "entity": null, "project": "archangel2"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.0_FU0.25_WU3", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.0_FU0.25_WU3/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 0.25, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 3}}}
{"date": "2024-01-09 01:55:26.966312", "total": 256, "seed": 0, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.0_FU0.05_WU15.04", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 75, "lengths": [846, 1086, 416, 1085, 893, 446, 354, 60, 226, 1086, 401, 794, 1086, 1085, 280, 113, 584, 1085, 263, 1086, 1086, 78, 1086, 36, 36, 637, 6, 449, 630, 138, 470, 185, 742, 92, 413, 331, 742, 742, 236, 88, 742, 278, 148, 743, 742, 742, 742, 106, 742, 741, 166, 290, 248, 180, 742, 742, 742, 518, 221, 528, 440, 742, 565, 742, 943, 1464, 1464, 937, 347, 1025, 211, 759, 1464, 284, 1464, 1464, 611, 1463, 1464, 1464, 947, 448, 352, 1464, 1464, 903, 772, 234, 1463, 501, 1464, 478, 280, 472, 1462, 1464, 997, 998, 997, 997, 500, 997, 998, 741, 996, 531, 997, 921, 195, 612, 362, 997, 237, 997, 996, 997, 997, 925, 997, 997, 997, 125, 998, 997, 997, 997, 684, 230, 1025, 25, 127, 418, 460, 72, 364, 1025, 1025, 127, 1025, 552, 242, 119, 920, 572, 769, 321, 1023, 841, 1025, 414, 210, 259, 297, 231, 1025, 6, 232, 1024, 79, 34, 377, 271, 430, 814, 184, 813, 92, 635, 814, 814, 553, 814, 814, 125, 815, 512, 814, 814, 814, 244, 813, 762, 814, 814, 813, 39, 10, 22, 814, 475, 814, 55, 325, 1153, 835, 1590, 579, 1590, 521, 1396, 889, 870, 1590, 811, 1198, 92, 558, 1590, 721, 423, 490, 134, 917, 1087, 1141, 338, 309, 1590, 536, 563, 1589, 486, 45, 1590, 1026, 1026, 177, 592, 46, 1026, 652, 581, 861, 1026, 1026, 530, 433, 461, 482, 1025, 174, 176, 1026, 534, 318, 1026, 336, 1026, 1026, 487, 206, 133, 1026, 1027, 12, 204]}, "baseline": {"name": "chosen", "wins": 173, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200]}, "config": {"seed": 1, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.0_FU0.05_WU15.04", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 41703, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.0_FU0.05_WU15.04", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.0_FU0.05_WU15.04/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 0.05, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 15.04}}}
{"date": "2024-01-09 02:02:54.463863", "total": 256, "seed": 0, "exp_name": "fracdata_kto_llama7b_FD0.05_WD26.66_FU1.0_WU1.0", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 88, "lengths": [393, 65, 77, 18, 368, 81, 339, 72, 383, 153, 25, 526, 41, 58, 86, 121, 8, 56, 47, 99, 34, 43, 155, 24, 36, 194, 6, 317, 592, 149, 26, 12, 96, 109, 154, 124, 110, 212, 74, 159, 130, 67, 57, 46, 619, 72, 742, 111, 495, 411, 56, 457, 316, 87, 221, 113, 484, 511, 6, 122, 223, 245, 72, 79, 189, 260, 87, 137, 16, 14, 38, 111, 30, 183, 8, 84, 924, 87, 423, 16, 60, 76, 83, 68, 6, 1271, 48, 206, 224, 12, 431, 462, 11, 15, 14, 263, 60, 116, 8, 29, 4, 12, 141, 997, 39, 182, 293, 81, 60, 211, 8, 30, 93, 256, 13, 110, 6, 9, 47, 8, 69, 88, 549, 29, 524, 103, 83, 134, 74, 35, 94, 35, 545, 16, 140, 137, 177, 113, 194, 111, 317, 38, 404, 137, 1026, 153, 121, 69, 356, 22, 84, 1025, 47, 980, 47, 6, 35, 102, 265, 83, 56, 139, 320, 42, 29, 176, 56, 236, 42, 110, 9, 33, 90, 8, 181, 161, 41, 33, 119, 130, 43, 6, 47, 29, 121, 11, 4, 30, 227, 79, 464, 6, 52, 112, 90, 146, 6, 157, 265, 1590, 393, 222, 148, 362, 288, 9, 18, 124, 92, 83, 45, 35, 40, 307, 99, 72, 128, 77, 734, 168, 205, 171, 8, 352, 8, 79, 42, 45, 122, 183, 18, 175, 155, 84, 256, 100, 35, 485, 273, 79, 40, 8, 67, 121, 21, 120, 331, 3, 31, 451, 161, 95, 145, 68, 4, 379]}, "baseline": {"name": "chosen", "wins": 151, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200]}, "config": {"seed": 1, "exp_name": "fracdata_kto_llama7b_FD0.05_WD26.66_FU1.0_WU1.0", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 52253, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/fracdata_kto_llama7b_FD0.05_WD26.66_FU1.0_WU1.0", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/fracdata_kto_llama7b_FD0.05_WD26.66_FU1.0_WU1.0/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 0.05, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 26.66, "undesirable_weight": 1.0}}}
{"date": "2024-01-09 02:10:22.456684", "total": 256, "seed": 0, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.33_FU1.0_WU1.0", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 116, "lengths": [427, 263, 59, 44, 291, 157, 36, 43, 484, 176, 86, 151, 39, 68, 191, 32, 253, 67, 91, 53, 176, 37, 179, 24, 60, 152, 6, 235, 22, 92, 110, 252, 616, 89, 174, 69, 25, 101, 130, 185, 260, 42, 234, 157, 664, 104, 98, 106, 322, 89, 91, 111, 166, 113, 464, 54, 437, 417, 4, 207, 71, 147, 221, 22, 173, 57, 224, 31, 188, 17, 39, 81, 95, 624, 58, 39, 173, 34, 126, 61, 17, 9, 70, 393, 3, 116, 42, 182, 55, 120, 110, 179, 14, 7, 166, 211, 174, 58, 16, 29, 34, 11, 196, 49, 94, 661, 151, 155, 69, 131, 14, 28, 80, 176, 44, 16, 41, 197, 35, 87, 27, 85, 655, 33, 289, 57, 244, 35, 174, 367, 186, 460, 641, 34, 151, 122, 32, 139, 315, 288, 115, 90, 139, 145, 110, 156, 83, 201, 107, 290, 67, 113, 152, 123, 56, 6, 144, 441, 10, 58, 159, 245, 186, 28, 5, 278, 64, 244, 12, 159, 179, 35, 90, 347, 124, 55, 95, 10, 227, 179, 263, 89, 9, 148, 446, 27, 25, 351, 69, 164, 54, 6, 48, 133, 427, 6, 62, 103, 353, 160, 37, 55, 130, 156, 183, 8, 148, 175, 48, 80, 46, 50, 138, 127, 141, 57, 168, 73, 546, 106, 35, 290, 50, 455, 16, 74, 168, 36, 51, 102, 16, 168, 103, 43, 125, 224, 289, 353, 325, 55, 271, 10, 260, 165, 18, 144, 729, 26, 163, 152, 79, 62, 209, 194, 6, 142]}, "baseline": {"name": "chosen", "wins": 125, "lengths": [12, 72, 54, 89, 182, 120, 36, 107, 671, 187, 147, 31, 103, 191, 60, 90, 102, 21, 132, 149, 168, 22, 6, 34, 149, 74, 7, 142, 48, 148, 24, 53, 62, 276, 193, 13, 36, 70, 63, 127, 38, 149, 61, 257, 27, 55, 281, 113, 1330, 47, 24, 114, 158, 55, 22, 14, 52, 626, 7, 541, 58, 39, 13, 21, 48, 153, 208, 65, 95, 164, 24, 26, 63, 45, 107, 38, 117, 8, 118, 15, 38, 107, 75, 68, 7, 175, 17, 467, 202, 31, 88, 35, 16, 15, 25, 129, 48, 34, 12, 17, 10, 18, 73, 90, 88, 89, 57, 221, 39, 46, 48, 45, 133, 405, 19, 28, 11, 34, 30, 47, 33, 34, 558, 57, 101, 33, 176, 80, 8, 52, 69, 54, 130, 23, 178, 23, 13, 86, 162, 247, 120, 72, 250, 36, 26, 215, 37, 20, 78, 113, 79, 75, 239, 262, 87, 7, 24, 12, 115, 37, 38, 2008, 319, 87, 187, 334, 71, 50, 9, 55, 26, 55, 72, 65, 40, 110, 279, 17, 262, 166, 79, 37, 37, 49, 481, 58, 7, 279, 28, 95, 221, 7, 51, 281, 52, 18, 106, 88, 321, 116, 92, 54, 8, 96, 52, 20, 16, 139, 91, 22, 40, 89, 100, 157, 86, 18, 68, 69, 251, 183, 15, 379, 26, 113, 4, 92, 8, 10, 67, 83, 68, 499, 50, 39, 164, 47, 26, 362, 200, 149, 60, 17, 48, 165, 45, 108, 308, 108, 34, 61, 166, 152, 151, 80, 7, 200]}, "config": {"seed": 1, "exp_name": "fracdata_kto_llama7b_FD1.0_WD1.33_FU1.0_WU1.0", "datasets": ["shp", "hh", "oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 58275, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.33_FU1.0_WU1.0", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/fracdata_kto_llama7b_FD1.0_WD1.33_FU1.0_WU1.0/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "huggyllama/llama-7b", "tokenizer_name_or_path": null, "load_from": null, "block_name": "LlamaDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.33, "undesirable_weight": 1.0}}}
{"date": "2024-01-13 02:17:47.911479", "total": 512, "seed": 0, "exp_name": "kto_mistral7b_oasst", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 334, "lengths": [278, 310, 33, 168, 181, 637, 132, 368, 224, 238, 128, 637, 129, 478, 400, 100, 207, 387, 230, 355, 24, 337, 637, 335, 350, 163, 187, 392, 526, 296, 221, 327, 224, 241, 224, 118, 543, 685, 384, 223, 538, 343, 259, 185, 39, 178, 215, 248, 241, 214, 101, 484, 90, 505, 168, 296, 350, 295, 284, 339, 184, 124, 502, 191, 438, 142, 247, 212, 114, 482, 173, 43, 462, 153, 742, 249, 142, 119, 297, 116, 120, 236, 318, 710, 270, 109, 246, 180, 234, 226, 186, 272, 1404, 1404, 306, 84, 190, 349, 63, 37, 453, 253, 168, 218, 243, 107, 350, 228, 165, 177, 108, 279, 125, 183, 109, 346, 361, 238, 356, 420, 165, 71, 230, 178, 152, 383, 223, 1075, 444, 196, 426, 379, 167, 220, 223, 399, 99, 245, 297, 439, 227, 177, 418, 253, 389, 381, 208, 180, 19, 165, 199, 226, 114, 355, 217, 233, 479, 595, 220, 1023, 314, 198, 366, 206, 283, 240, 376, 1436, 78, 162, 161, 315, 265, 223, 352, 244, 80, 138, 208, 82, 212, 297, 398, 137, 255, 243, 296, 431, 322, 173, 377, 317, 221, 729, 299, 305, 288, 266, 209, 207, 401, 316, 729, 248, 166, 265, 210, 355, 215, 421, 195, 710, 89, 258, 351, 729, 310, 161, 458, 290, 332, 211, 449, 191, 285, 308, 183, 1176, 49, 179, 288, 252, 1176, 208, 205, 192, 323, 12, 251, 332, 248, 17, 99, 463, 25, 509, 172, 115, 132, 209, 435, 349, 408, 167, 471, 23, 674, 187, 443, 770, 176, 57, 301, 374, 82, 507, 285, 357, 481, 301, 299, 317, 307, 441, 385, 1606, 33, 430, 76, 176, 282, 414, 372, 359, 328, 73, 287, 193, 309, 159, 292, 632, 34, 772, 271, 169, 208, 484, 627, 208, 362, 153, 285, 55, 217, 508, 384, 148, 226, 200, 493, 310, 207, 419, 329, 249, 167, 152, 426, 88, 415, 203, 229, 278, 892, 121, 219, 160, 227, 183, 184, 274, 283, 266, 75, 159, 665, 1152, 241, 281, 279, 302, 434, 211, 227, 217, 127, 239, 183, 93, 527, 23, 244, 301, 173, 224, 335, 416, 397, 278, 202, 293, 365, 208, 461, 466, 270, 260, 327, 292, 104, 149, 542, 186, 21, 17, 365, 182, 160, 177, 384, 306, 328, 336, 317, 427, 335, 334, 241, 131, 209, 183, 174, 427, 246, 159, 486, 948, 245, 50, 304, 369, 94, 123, 327, 354, 452, 387, 148, 176, 127, 606, 225, 614, 393, 408, 224, 273, 125, 561, 397, 297, 369, 412, 263, 518, 131, 182, 126, 671, 466, 159, 282, 125, 458, 328, 233, 456, 173, 232, 333, 355, 23, 312, 315, 584, 902, 171, 243, 453, 85, 131, 179, 211, 454, 255, 113, 314, 397, 393, 336, 165, 326, 340, 659, 394, 172, 24, 330, 242, 533, 358, 226, 421, 444, 350, 393, 336, 1152, 220, 149, 266, 140, 443, 179, 274, 162, 240, 151, 85, 424, 326, 278, 600, 269, 526, 312, 200, 136, 345, 180, 540, 212, 161, 1232, 200, 165, 272, 202, 120, 317, 529]}, "baseline": {"name": "chosen", "wins": 159, "lengths": [17, 328, 15, 432, 390, 96, 26, 56, 234, 22, 47, 79, 12, 326, 44, 29, 210, 72, 121, 520, 8, 172, 948, 187, 80, 83, 33, 1639, 109, 177, 114, 174, 45, 124, 205, 105, 124, 40, 542, 155, 519, 247, 185, 73, 34, 26, 465, 311, 67, 31, 95, 171, 53, 335, 60, 24, 67, 38, 132, 76, 46, 830, 53, 91, 53, 360, 20, 75, 98, 91, 156, 21, 83, 15, 122, 3, 12, 130, 446, 6, 78, 6, 61, 40, 180, 11, 703, 114, 198, 63, 35, 196, 415, 326, 7, 35, 32, 62, 11, 22, 19, 173, 49, 38, 110, 23, 50, 15, 276, 267, 28, 37, 34, 69, 66, 238, 147, 122, 99, 472, 31, 38, 218, 178, 41, 22, 44, 77, 284, 437, 26, 93, 53, 165, 61, 75, 81, 131, 332, 618, 107, 190, 64, 306, 3, 74, 21, 58, 9, 135, 278, 108, 34, 195, 858, 185, 428, 478, 242, 390, 125, 69, 150, 130, 259, 146, 451, 51, 138, 41, 22, 10, 26, 85, 57, 108, 62, 41, 363, 50, 77, 56, 122, 32, 75, 39, 61, 185, 551, 133, 168, 50, 454, 137, 261, 260, 73, 107, 145, 47, 705, 104, 503, 178, 368, 305, 69, 256, 6, 446, 140, 78, 138, 84, 112, 131, 168, 23, 29, 178, 47, 24, 31, 72, 551, 14, 32, 36, 21, 53, 122, 14, 842, 121, 66, 27, 188, 733, 53, 93, 298, 13, 19, 243, 9, 497, 122, 105, 12, 246, 28, 114, 479, 24, 720, 30, 44, 15, 217, 814, 31, 58, 41, 206, 27, 600, 22, 209, 19, 95, 18, 61, 204, 128, 9, 293, 34, 77, 12, 102, 391, 205, 85, 893, 122, 51, 355, 205, 394, 40, 103, 325, 41, 12, 33, 21, 12, 310, 54, 417, 57, 24, 308, 19, 260, 37, 37, 9, 61, 61, 227, 50, 103, 145, 203, 17, 22, 3, 134, 218, 154, 48, 28, 115, 720, 78, 50, 47, 210, 24, 129, 391, 135, 50, 28, 8, 236, 307, 387, 371, 195, 55, 227, 152, 81, 7, 41, 63, 188, 38, 240, 39, 41, 237, 96, 319, 55, 51, 131, 129, 170, 56, 409, 318, 242, 307, 92, 155, 66, 25, 93, 238, 557, 372, 28, 14, 620, 15, 96, 78, 615, 320, 121, 265, 155, 54, 149, 160, 94, 28, 233, 42, 56, 90, 166, 36, 30, 259, 139, 66, 261, 17, 453, 165, 108, 161, 71, 208, 24, 132, 55, 238, 271, 886, 341, 528, 122, 22, 318, 227, 247, 83, 36, 409, 25, 10, 26, 278, 363, 520, 47, 14, 28, 102, 600, 341, 32, 390, 48, 326, 390, 107, 13, 432, 121, 551, 618, 115, 34, 196, 44, 9, 61, 92, 335, 51, 11, 121, 217, 204, 93, 41, 54, 319, 243, 57, 27, 39, 108, 6, 34, 122, 47, 122, 236, 234, 150, 195, 173, 195, 9, 96, 155, 15, 108, 103, 75, 620, 156, 38, 446, 129, 372, 227, 15, 61, 56, 454, 17, 132, 26, 77, 37, 104, 33, 132, 15, 72, 28, 19, 22, 78]}, "config": {"seed": 1, "exp_name": "kto_mistral7b_oasst", "datasets": ["oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 60317, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/kto_mistral7b_oasst", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/kto_mistral7b_oasst/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "mistralai/Mistral-7B-v0.1", "tokenizer_name_or_path": null, "load_from": null, "block_name": "MistralDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-13 02:31:57.332216", "total": 512, "seed": 0, "exp_name": "dpo_mistral7b_oasst", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 307, "lengths": [200, 628, 13, 180, 141, 637, 149, 306, 252, 264, 284, 637, 116, 637, 399, 166, 395, 367, 368, 544, 355, 330, 637, 361, 357, 274, 382, 479, 593, 264, 306, 270, 61, 850, 488, 337, 442, 444, 593, 1149, 171, 1149, 137, 350, 169, 248, 246, 167, 373, 275, 357, 253, 405, 300, 157, 276, 194, 289, 431, 621, 98, 488, 319, 99, 241, 360, 232, 134, 375, 291, 103, 85, 332, 470, 709, 1403, 16, 134, 359, 403, 507, 405, 381, 130, 372, 1009, 304, 185, 274, 121, 190, 430, 419, 1404, 340, 320, 229, 388, 384, 180, 318, 1445, 492, 176, 173, 435, 320, 374, 226, 273, 562, 276, 239, 296, 213, 200, 315, 192, 486, 386, 1010, 229, 198, 226, 212, 186, 502, 492, 393, 532, 942, 390, 239, 230, 611, 426, 80, 445, 352, 1329, 277, 136, 1328, 357, 340, 162, 531, 520, 1329, 152, 370, 66, 105, 528, 712, 449, 1330, 94, 204, 612, 389, 105, 937, 50, 265, 222, 760, 686, 55, 1436, 209, 393, 479, 406, 1436, 131, 47, 283, 84, 188, 169, 331, 64, 75, 690, 365, 604, 529, 399, 114, 239, 307, 259, 729, 729, 294, 461, 264, 149, 729, 350, 114, 310, 241, 729, 410, 207, 264, 157, 264, 729, 729, 382, 591, 729, 432, 338, 279, 729, 233, 90, 729, 582, 729, 592, 311, 125, 798, 592, 216, 721, 16, 479, 320, 362, 94, 631, 178, 407, 314, 509, 22, 1176, 503, 381, 299, 296, 219, 91, 184, 374, 415, 375, 113, 489, 38, 366, 575, 397, 961, 1606, 27, 742, 117, 209, 657, 153, 311, 972, 167, 589, 170, 305, 292, 366, 362, 6, 281, 274, 1606, 321, 1251, 307, 1606, 394, 236, 323, 508, 1519, 174, 263, 387, 713, 585, 80, 92, 175, 825, 1042, 648, 359, 85, 343, 453, 304, 251, 143, 222, 182, 123, 618, 271, 964, 466, 226, 327, 1623, 476, 228, 444, 313, 644, 173, 164, 1152, 650, 319, 441, 346, 161, 538, 611, 186, 352, 1152, 157, 366, 288, 120, 415, 153, 379, 357, 299, 435, 144, 253, 135, 1047, 918, 617, 19, 584, 475, 171, 126, 122, 865, 233, 442, 148, 308, 298, 205, 292, 344, 211, 308, 414, 329, 195, 274, 915, 385, 29, 11, 384, 36, 268, 438, 342, 263, 620, 371, 276, 1703, 141, 250, 405, 100, 714, 58, 454, 315, 325, 427, 537, 591, 359, 277, 417, 53, 367, 227, 488, 314, 385, 314, 35, 145, 186, 206, 679, 639, 835, 433, 244, 86, 235, 627, 239, 422, 1434, 618, 371, 576, 90, 347, 291, 1433, 435, 93, 112, 132, 1142, 77, 172, 622, 164, 403, 497, 302, 35, 221, 382, 214, 379, 272, 317, 312, 141, 1152, 189, 412, 395, 1152, 150, 331, 545, 576, 146, 1152, 430, 479, 386, 468, 132, 31, 459, 1152, 127, 191, 1152, 1152, 112, 133, 835, 810, 744, 1152, 127, 159, 211, 603, 124, 256, 276, 342, 216, 247, 286, 219, 207, 267, 362, 434, 186, 782, 393, 213, 180, 423, 1231, 112, 148, 228, 1233, 309, 103, 573, 291, 478]}, "baseline": {"name": "chosen", "wins": 195, "lengths": [17, 328, 15, 432, 390, 96, 26, 56, 234, 22, 47, 79, 12, 326, 44, 29, 210, 72, 121, 520, 8, 172, 948, 187, 80, 83, 33, 1639, 109, 177, 114, 174, 45, 124, 205, 105, 124, 40, 542, 155, 519, 247, 185, 73, 34, 26, 465, 311, 67, 31, 95, 171, 53, 335, 60, 24, 67, 38, 132, 76, 46, 830, 53, 91, 53, 360, 20, 75, 98, 91, 156, 21, 83, 15, 122, 3, 12, 130, 446, 6, 78, 6, 61, 40, 180, 11, 703, 114, 198, 63, 35, 196, 415, 326, 7, 35, 32, 62, 11, 22, 19, 173, 49, 38, 110, 23, 50, 15, 276, 267, 28, 37, 34, 69, 66, 238, 147, 122, 99, 472, 31, 38, 218, 178, 41, 22, 44, 77, 284, 437, 26, 93, 53, 165, 61, 75, 81, 131, 332, 618, 107, 190, 64, 306, 3, 74, 21, 58, 9, 135, 278, 108, 34, 195, 858, 185, 428, 478, 242, 390, 125, 69, 150, 130, 259, 146, 451, 51, 138, 41, 22, 10, 26, 85, 57, 108, 62, 41, 363, 50, 77, 56, 122, 32, 75, 39, 61, 185, 551, 133, 168, 50, 454, 137, 261, 260, 73, 107, 145, 47, 705, 104, 503, 178, 368, 305, 69, 256, 6, 446, 140, 78, 138, 84, 112, 131, 168, 23, 29, 178, 47, 24, 31, 72, 551, 14, 32, 36, 21, 53, 122, 14, 842, 121, 66, 27, 188, 733, 53, 93, 298, 13, 19, 243, 9, 497, 122, 105, 12, 246, 28, 114, 479, 24, 720, 30, 44, 15, 217, 814, 31, 58, 41, 206, 27, 600, 22, 209, 19, 95, 18, 61, 204, 128, 9, 293, 34, 77, 12, 102, 391, 205, 85, 893, 122, 51, 355, 205, 394, 40, 103, 325, 41, 12, 33, 21, 12, 310, 54, 417, 57, 24, 308, 19, 260, 37, 37, 9, 61, 61, 227, 50, 103, 145, 203, 17, 22, 3, 134, 218, 154, 48, 28, 115, 720, 78, 50, 47, 210, 24, 129, 391, 135, 50, 28, 8, 236, 307, 387, 371, 195, 55, 227, 152, 81, 7, 41, 63, 188, 38, 240, 39, 41, 237, 96, 319, 55, 51, 131, 129, 170, 56, 409, 318, 242, 307, 92, 155, 66, 25, 93, 238, 557, 372, 28, 14, 620, 15, 96, 78, 615, 320, 121, 265, 155, 54, 149, 160, 94, 28, 233, 42, 56, 90, 166, 36, 30, 259, 139, 66, 261, 17, 453, 165, 108, 161, 71, 208, 24, 132, 55, 238, 271, 886, 341, 528, 122, 22, 318, 227, 247, 83, 36, 409, 25, 10, 26, 278, 363, 520, 47, 14, 28, 102, 600, 341, 32, 390, 48, 326, 390, 107, 13, 432, 121, 551, 618, 115, 34, 196, 44, 9, 61, 92, 335, 51, 11, 121, 217, 204, 93, 41, 54, 319, 243, 57, 27, 39, 108, 6, 34, 122, 47, 122, 236, 234, 150, 195, 173, 195, 9, 96, 155, 15, 108, 103, 75, 620, 156, 38, 446, 129, 372, 227, 15, 61, 56, 454, 17, 132, 26, 77, 37, 104, 33, 132, 15, 72, 28, 19, 22, 78]}, "config": {"seed": 1, "exp_name": "dpo_mistral7b_oasst", "datasets": ["oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 49549, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/dpo_mistral7b_oasst", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/dpo_mistral7b_oasst/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "mistralai/Mistral-7B-v0.1", "tokenizer_name_or_path": null, "load_from": null, "block_name": "MistralDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-13 02:55:09.113524", "total": 512, "seed": 0, "exp_name": "unaligned_mistral7b_oasst", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 269, "lengths": [637, 431, 637, 250, 184, 300, 355, 637, 637, 637, 637, 637, 635, 638, 284, 636, 637, 557, 254, 199, 241, 238, 637, 637, 637, 637, 637, 637, 637, 637, 637, 318, 1149, 511, 258, 757, 841, 633, 1149, 853, 14, 1149, 452, 231, 263, 1149, 1149, 718, 1149, 960, 726, 1149, 504, 174, 861, 73, 1149, 120, 1149, 973, 129, 550, 279, 87, 439, 1106, 230, 818, 138, 148, 1130, 13, 186, 229, 1404, 232, 13, 502, 739, 1404, 1404, 1404, 1404, 235, 208, 967, 190, 89, 237, 1404, 622, 365, 1404, 809, 253, 1404, 1071, 979, 259, 1445, 1445, 722, 31, 1445, 1210, 365, 1445, 1445, 1383, 126, 1445, 1445, 348, 1445, 1094, 1445, 278, 455, 1445, 47, 1445, 1445, 422, 353, 149, 1445, 1036, 1445, 1329, 303, 228, 546, 1329, 215, 582, 1329, 232, 573, 287, 1329, 1330, 1329, 797, 233, 793, 587, 273, 1329, 419, 1329, 1329, 228, 179, 1329, 1312, 1328, 1330, 1329, 204, 1169, 180, 1435, 31, 171, 244, 426, 1436, 116, 98, 71, 84, 1436, 617, 1436, 1436, 222, 1435, 1436, 1436, 184, 1436, 102, 1137, 541, 834, 1105, 1436, 122, 123, 806, 344, 869, 729, 206, 276, 220, 212, 176, 201, 729, 248, 729, 729, 729, 215, 729, 547, 729, 261, 729, 274, 305, 637, 729, 388, 392, 727, 113, 304, 262, 728, 729, 729, 729, 1176, 1176, 400, 1176, 410, 753, 271, 16, 855, 1176, 168, 1176, 1176, 236, 1176, 306, 1176, 6, 340, 1176, 236, 409, 1176, 530, 309, 223, 429, 81, 1176, 29, 514, 60, 1605, 1605, 153, 1606, 603, 1606, 184, 1606, 1606, 652, 1325, 112, 394, 232, 1605, 542, 1606, 858, 1606, 370, 6, 1606, 1606, 393, 146, 522, 332, 225, 1606, 611, 1606, 557, 1623, 1622, 1623, 800, 211, 1623, 827, 144, 1623, 762, 188, 838, 1623, 1623, 216, 1623, 61, 1623, 1623, 79, 570, 221, 387, 1623, 1622, 392, 943, 851, 1623, 563, 904, 220, 1152, 260, 328, 1152, 1152, 1152, 204, 355, 413, 386, 1152, 1152, 1152, 262, 245, 395, 1152, 1152, 1153, 945, 1152, 591, 1153, 458, 510, 682, 1152, 1152, 1152, 1152, 1152, 1152, 413, 915, 915, 726, 914, 178, 293, 904, 188, 915, 282, 691, 259, 595, 915, 915, 146, 915, 296, 915, 915, 915, 592, 11, 127, 255, 915, 915, 651, 359, 865, 915, 314, 1703, 63, 296, 799, 20, 1702, 63, 107, 1702, 65, 341, 573, 1703, 244, 256, 171, 53, 1702, 277, 1702, 401, 1703, 1703, 1703, 625, 1703, 867, 293, 1552, 1703, 1703, 565, 355, 1433, 623, 936, 422, 1179, 1433, 1433, 442, 149, 194, 145, 797, 1172, 93, 17, 1433, 464, 478, 1433, 498, 201, 1433, 561, 1374, 216, 134, 554, 461, 1433, 829, 209, 802, 1152, 1152, 1152, 1152, 1152, 1152, 1153, 191, 658, 1152, 92, 298, 448, 504, 846, 1152, 945, 290, 1152, 559, 227, 507, 1010, 1152, 1152, 720, 763, 514, 78, 14, 1026, 603, 1069, 726, 1168, 215, 1035, 546, 822, 1233, 1231, 1232, 827, 1007, 1232, 1232, 1232, 1232, 1232, 187, 1032, 389, 1232, 73, 1232, 112, 1231, 953, 238, 613, 1232, 540]}, "baseline": {"name": "chosen", "wins": 237, "lengths": [17, 328, 15, 432, 390, 96, 26, 56, 234, 22, 47, 79, 12, 326, 44, 29, 210, 72, 121, 520, 8, 172, 948, 187, 80, 83, 33, 1639, 109, 177, 114, 174, 45, 124, 205, 105, 124, 40, 542, 155, 519, 247, 185, 73, 34, 26, 465, 311, 67, 31, 95, 171, 53, 335, 60, 24, 67, 38, 132, 76, 46, 830, 53, 91, 53, 360, 20, 75, 98, 91, 156, 21, 83, 15, 122, 3, 12, 130, 446, 6, 78, 6, 61, 40, 180, 11, 703, 114, 198, 63, 35, 196, 415, 326, 7, 35, 32, 62, 11, 22, 19, 173, 49, 38, 110, 23, 50, 15, 276, 267, 28, 37, 34, 69, 66, 238, 147, 122, 99, 472, 31, 38, 218, 178, 41, 22, 44, 77, 284, 437, 26, 93, 53, 165, 61, 75, 81, 131, 332, 618, 107, 190, 64, 306, 3, 74, 21, 58, 9, 135, 278, 108, 34, 195, 858, 185, 428, 478, 242, 390, 125, 69, 150, 130, 259, 146, 451, 51, 138, 41, 22, 10, 26, 85, 57, 108, 62, 41, 363, 50, 77, 56, 122, 32, 75, 39, 61, 185, 551, 133, 168, 50, 454, 137, 261, 260, 73, 107, 145, 47, 705, 104, 503, 178, 368, 305, 69, 256, 6, 446, 140, 78, 138, 84, 112, 131, 168, 23, 29, 178, 47, 24, 31, 72, 551, 14, 32, 36, 21, 53, 122, 14, 842, 121, 66, 27, 188, 733, 53, 93, 298, 13, 19, 243, 9, 497, 122, 105, 12, 246, 28, 114, 479, 24, 720, 30, 44, 15, 217, 814, 31, 58, 41, 206, 27, 600, 22, 209, 19, 95, 18, 61, 204, 128, 9, 293, 34, 77, 12, 102, 391, 205, 85, 893, 122, 51, 355, 205, 394, 40, 103, 325, 41, 12, 33, 21, 12, 310, 54, 417, 57, 24, 308, 19, 260, 37, 37, 9, 61, 61, 227, 50, 103, 145, 203, 17, 22, 3, 134, 218, 154, 48, 28, 115, 720, 78, 50, 47, 210, 24, 129, 391, 135, 50, 28, 8, 236, 307, 387, 371, 195, 55, 227, 152, 81, 7, 41, 63, 188, 38, 240, 39, 41, 237, 96, 319, 55, 51, 131, 129, 170, 56, 409, 318, 242, 307, 92, 155, 66, 25, 93, 238, 557, 372, 28, 14, 620, 15, 96, 78, 615, 320, 121, 265, 155, 54, 149, 160, 94, 28, 233, 42, 56, 90, 166, 36, 30, 259, 139, 66, 261, 17, 453, 165, 108, 161, 71, 208, 24, 132, 55, 238, 271, 886, 341, 528, 122, 22, 318, 227, 247, 83, 36, 409, 25, 10, 26, 278, 363, 520, 47, 14, 28, 102, 600, 341, 32, 390, 48, 326, 390, 107, 13, 432, 121, 551, 618, 115, 34, 196, 44, 9, 61, 92, 335, 51, 11, 121, 217, 204, 93, 41, 54, 319, 243, 57, 27, 39, 108, 6, 34, 122, 47, 122, 236, 234, 150, 195, 173, 195, 9, 96, 155, 15, 108, 103, 75, 620, 156, 38, 446, 129, 372, 227, 15, 61, 56, 454, 17, 132, 26, 77, 37, 104, 33, 132, 15, 72, 28, 19, 22, 78]}, "config": {"seed": 1, "exp_name": "mistral7b_oasst", "datasets": ["oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 49549, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/dpo_mistral7b_oasst", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": null, "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "mistralai/Mistral-7B-v0.1", "tokenizer_name_or_path": null, "load_from": null, "block_name": "MistralDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-13 05:02:08.463680", "total": 512, "seed": 0, "exp_name": "kto_mistral7b-instruct_oasst", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 325, "lengths": [210, 343, 36, 316, 358, 195, 294, 556, 378, 337, 391, 73, 94, 387, 475, 107, 324, 221, 280, 255, 18, 275, 637, 260, 348, 116, 186, 637, 607, 309, 305, 197, 512, 301, 283, 103, 718, 494, 529, 184, 485, 408, 252, 237, 92, 138, 667, 348, 430, 465, 191, 1149, 84, 378, 175, 546, 231, 444, 470, 391, 343, 331, 253, 151, 396, 166, 180, 217, 217, 367, 509, 48, 408, 282, 210, 495, 351, 250, 467, 196, 225, 341, 411, 258, 250, 184, 381, 465, 322, 202, 594, 360, 1404, 564, 623, 153, 190, 630, 108, 96, 150, 554, 92, 107, 173, 72, 342, 326, 253, 221, 118, 238, 242, 241, 117, 333, 480, 381, 340, 297, 153, 134, 319, 130, 193, 303, 216, 299, 428, 1329, 185, 407, 211, 382, 646, 382, 215, 480, 383, 451, 334, 371, 499, 379, 436, 336, 269, 210, 210, 177, 239, 319, 183, 444, 290, 1329, 657, 790, 368, 405, 267, 142, 632, 606, 297, 340, 646, 42, 198, 349, 85, 458, 366, 114, 507, 149, 336, 168, 185, 186, 211, 304, 126, 222, 450, 296, 349, 468, 322, 272, 406, 226, 259, 276, 412, 264, 283, 351, 144, 276, 482, 347, 470, 299, 278, 340, 228, 466, 209, 349, 514, 728, 209, 265, 426, 728, 401, 272, 199, 152, 189, 149, 412, 368, 1060, 210, 457, 1063, 66, 187, 495, 150, 892, 497, 169, 138, 438, 602, 304, 202, 340, 23, 155, 399, 150, 445, 245, 572, 220, 182, 392, 395, 637, 170, 485, 47, 707, 202, 432, 1323, 241, 47, 264, 328, 221, 757, 255, 354, 319, 155, 303, 209, 352, 570, 501, 128, 33, 282, 85, 126, 328, 334, 303, 214, 362, 103, 305, 226, 472, 164, 715, 275, 26, 1464, 105, 189, 307, 582, 489, 208, 372, 133, 353, 41, 244, 350, 512, 131, 283, 313, 308, 278, 302, 298, 240, 113, 294, 221, 270, 267, 320, 283, 100, 1117, 802, 192, 653, 118, 216, 145, 186, 641, 223, 393, 102, 285, 209, 245, 258, 178, 268, 341, 529, 1152, 142, 278, 115, 180, 232, 220, 1090, 33, 233, 350, 317, 235, 271, 422, 297, 316, 250, 398, 302, 395, 296, 550, 204, 193, 278, 48, 119, 325, 884, 356, 20, 64, 338, 137, 274, 423, 402, 380, 915, 725, 254, 296, 324, 321, 355, 288, 211, 153, 166, 330, 318, 207, 409, 1703, 520, 55, 387, 13, 250, 185, 1325, 660, 169, 320, 190, 229, 348, 621, 289, 1007, 944, 368, 187, 143, 317, 207, 382, 409, 1433, 376, 47, 486, 172, 342, 213, 270, 227, 313, 183, 118, 934, 329, 198, 405, 190, 203, 291, 295, 22, 443, 357, 792, 1433, 284, 628, 538, 196, 114, 308, 162, 308, 809, 141, 1108, 555, 198, 393, 262, 294, 371, 253, 558, 224, 47, 287, 178, 320, 434, 317, 485, 1152, 357, 608, 428, 776, 686, 133, 225, 197, 429, 886, 305, 253, 438, 214, 76, 508, 205, 240, 401, 346, 470, 148, 322, 157, 436, 137, 305, 270, 211, 372, 167, 120, 384, 294, 120, 245, 518]}, "baseline": {"name": "chosen", "wins": 176, "lengths": [17, 328, 15, 432, 390, 96, 26, 56, 234, 22, 47, 79, 12, 326, 44, 29, 210, 72, 121, 520, 8, 172, 948, 187, 80, 83, 33, 1639, 109, 177, 114, 174, 45, 124, 205, 105, 124, 40, 542, 155, 519, 247, 185, 73, 34, 26, 465, 311, 67, 31, 95, 171, 53, 335, 60, 24, 67, 38, 132, 76, 46, 830, 53, 91, 53, 360, 20, 75, 98, 91, 156, 21, 83, 15, 122, 3, 12, 130, 446, 6, 78, 6, 61, 40, 180, 11, 703, 114, 198, 63, 35, 196, 415, 326, 7, 35, 32, 62, 11, 22, 19, 173, 49, 38, 110, 23, 50, 15, 276, 267, 28, 37, 34, 69, 66, 238, 147, 122, 99, 472, 31, 38, 218, 178, 41, 22, 44, 77, 284, 437, 26, 93, 53, 165, 61, 75, 81, 131, 332, 618, 107, 190, 64, 306, 3, 74, 21, 58, 9, 135, 278, 108, 34, 195, 858, 185, 428, 478, 242, 390, 125, 69, 150, 130, 259, 146, 451, 51, 138, 41, 22, 10, 26, 85, 57, 108, 62, 41, 363, 50, 77, 56, 122, 32, 75, 39, 61, 185, 551, 133, 168, 50, 454, 137, 261, 260, 73, 107, 145, 47, 705, 104, 503, 178, 368, 305, 69, 256, 6, 446, 140, 78, 138, 84, 112, 131, 168, 23, 29, 178, 47, 24, 31, 72, 551, 14, 32, 36, 21, 53, 122, 14, 842, 121, 66, 27, 188, 733, 53, 93, 298, 13, 19, 243, 9, 497, 122, 105, 12, 246, 28, 114, 479, 24, 720, 30, 44, 15, 217, 814, 31, 58, 41, 206, 27, 600, 22, 209, 19, 95, 18, 61, 204, 128, 9, 293, 34, 77, 12, 102, 391, 205, 85, 893, 122, 51, 355, 205, 394, 40, 103, 325, 41, 12, 33, 21, 12, 310, 54, 417, 57, 24, 308, 19, 260, 37, 37, 9, 61, 61, 227, 50, 103, 145, 203, 17, 22, 3, 134, 218, 154, 48, 28, 115, 720, 78, 50, 47, 210, 24, 129, 391, 135, 50, 28, 8, 236, 307, 387, 371, 195, 55, 227, 152, 81, 7, 41, 63, 188, 38, 240, 39, 41, 237, 96, 319, 55, 51, 131, 129, 170, 56, 409, 318, 242, 307, 92, 155, 66, 25, 93, 238, 557, 372, 28, 14, 620, 15, 96, 78, 615, 320, 121, 265, 155, 54, 149, 160, 94, 28, 233, 42, 56, 90, 166, 36, 30, 259, 139, 66, 261, 17, 453, 165, 108, 161, 71, 208, 24, 132, 55, 238, 271, 886, 341, 528, 122, 22, 318, 227, 247, 83, 36, 409, 25, 10, 26, 278, 363, 520, 47, 14, 28, 102, 600, 341, 32, 390, 48, 326, 390, 107, 13, 432, 121, 551, 618, 115, 34, 196, 44, 9, 61, 92, 335, 51, 11, 121, 217, 204, 93, 41, 54, 319, 243, 57, 27, 39, 108, 6, 34, 122, 47, 122, 236, 234, 150, 195, 173, 195, 9, 96, 155, 15, 108, 103, 75, 620, 156, 38, 446, 129, 372, 227, 15, 61, 56, 454, 17, 132, 26, 77, 37, 104, 33, 132, 15, 72, 28, 19, 22, 78]}, "config": {"seed": 1, "exp_name": "kto_mistral7b-instruct_oasst", "datasets": ["oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 45537, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/kto_mistral7b-instruct_oasst", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/kto_mistral7b-instruct_oasst/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "mistralai/Mistral-7B-Instruct-v0.1", "tokenizer_name_or_path": null, "load_from": null, "block_name": "MistralDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnpairedPreferenceDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}
{"date": "2024-01-13 05:17:54.136597", "total": 512, "seed": 0, "exp_name": "dpo_mistral7b-instruct_oasst", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 334, "lengths": [101, 268, 33, 397, 253, 107, 174, 227, 299, 288, 259, 302, 450, 346, 477, 24, 240, 162, 286, 323, 13, 317, 637, 174, 213, 125, 119, 638, 240, 155, 263, 238, 401, 310, 231, 13, 534, 227, 539, 119, 548, 568, 115, 115, 64, 81, 294, 265, 115, 36, 84, 356, 56, 199, 84, 282, 188, 275, 323, 337, 408, 346, 282, 40, 131, 84, 231, 203, 125, 404, 91, 30, 298, 319, 113, 265, 53, 206, 529, 33, 47, 332, 468, 138, 114, 111, 301, 287, 223, 296, 390, 298, 751, 1404, 213, 67, 314, 309, 10, 18, 137, 174, 82, 91, 109, 27, 330, 187, 39, 293, 63, 311, 87, 245, 46, 197, 284, 190, 354, 259, 93, 10, 190, 96, 128, 215, 59, 323, 359, 1019, 177, 308, 122, 362, 786, 333, 66, 368, 270, 457, 345, 240, 197, 310, 459, 400, 122, 194, 27, 155, 203, 277, 53, 112, 335, 121, 549, 513, 377, 355, 443, 94, 528, 454, 251, 88, 518, 12, 47, 304, 56, 231, 264, 49, 290, 106, 80, 172, 136, 126, 159, 156, 224, 42, 518, 386, 180, 184, 594, 240, 280, 159, 172, 729, 248, 360, 149, 239, 94, 270, 334, 106, 639, 286, 342, 318, 159, 374, 290, 378, 138, 511, 90, 251, 209, 355, 352, 109, 263, 165, 269, 96, 457, 197, 518, 389, 112, 862, 41, 123, 217, 235, 634, 337, 86, 64, 294, 33, 334, 64, 372, 22, 98, 223, 20, 339, 380, 73, 56, 150, 109, 293, 446, 242, 441, 35, 36, 82, 355, 1606, 459, 15, 104, 214, 79, 775, 152, 344, 304, 64, 161, 223, 100, 499, 233, 290, 18, 179, 38, 51, 195, 221, 255, 278, 400, 278, 321, 128, 174, 108, 272, 229, 18, 41, 38, 87, 280, 431, 255, 118, 848, 107, 780, 24, 305, 269, 94, 128, 208, 104, 117, 389, 184, 288, 188, 91, 235, 13, 115, 42, 310, 150, 182, 209, 825, 101, 285, 77, 162, 127, 73, 292, 193, 342, 114, 291, 156, 493, 663, 185, 235, 91, 281, 183, 126, 120, 85, 181, 179, 40, 1152, 28, 143, 231, 361, 174, 257, 712, 357, 89, 165, 349, 142, 149, 176, 318, 179, 137, 199, 39, 69, 141, 551, 184, 22, 32, 334, 199, 98, 410, 297, 311, 641, 414, 243, 337, 270, 223, 252, 175, 106, 89, 202, 284, 141, 118, 438, 514, 430, 33, 170, 22, 28, 188, 157, 261, 233, 275, 167, 204, 76, 521, 179, 687, 1703, 225, 266, 41, 182, 219, 362, 364, 911, 337, 33, 274, 131, 231, 57, 390, 198, 91, 195, 52, 576, 304, 157, 568, 91, 359, 391, 239, 28, 284, 669, 487, 537, 237, 93, 307, 79, 11, 315, 144, 333, 246, 68, 352, 313, 72, 354, 131, 166, 183, 163, 398, 121, 30, 226, 42, 118, 1139, 213, 415, 104, 199, 427, 321, 215, 259, 130, 127, 154, 230, 64, 198, 122, 106, 315, 15, 400, 142, 132, 471, 145, 395, 136, 280, 60, 405, 146, 231, 180, 150, 25, 140, 121, 299, 158, 81, 150, 402]}, "baseline": {"name": "chosen", "wins": 160, "lengths": [17, 328, 15, 432, 390, 96, 26, 56, 234, 22, 47, 79, 12, 326, 44, 29, 210, 72, 121, 520, 8, 172, 948, 187, 80, 83, 33, 1639, 109, 177, 114, 174, 45, 124, 205, 105, 124, 40, 542, 155, 519, 247, 185, 73, 34, 26, 465, 311, 67, 31, 95, 171, 53, 335, 60, 24, 67, 38, 132, 76, 46, 830, 53, 91, 53, 360, 20, 75, 98, 91, 156, 21, 83, 15, 122, 3, 12, 130, 446, 6, 78, 6, 61, 40, 180, 11, 703, 114, 198, 63, 35, 196, 415, 326, 7, 35, 32, 62, 11, 22, 19, 173, 49, 38, 110, 23, 50, 15, 276, 267, 28, 37, 34, 69, 66, 238, 147, 122, 99, 472, 31, 38, 218, 178, 41, 22, 44, 77, 284, 437, 26, 93, 53, 165, 61, 75, 81, 131, 332, 618, 107, 190, 64, 306, 3, 74, 21, 58, 9, 135, 278, 108, 34, 195, 858, 185, 428, 478, 242, 390, 125, 69, 150, 130, 259, 146, 451, 51, 138, 41, 22, 10, 26, 85, 57, 108, 62, 41, 363, 50, 77, 56, 122, 32, 75, 39, 61, 185, 551, 133, 168, 50, 454, 137, 261, 260, 73, 107, 145, 47, 705, 104, 503, 178, 368, 305, 69, 256, 6, 446, 140, 78, 138, 84, 112, 131, 168, 23, 29, 178, 47, 24, 31, 72, 551, 14, 32, 36, 21, 53, 122, 14, 842, 121, 66, 27, 188, 733, 53, 93, 298, 13, 19, 243, 9, 497, 122, 105, 12, 246, 28, 114, 479, 24, 720, 30, 44, 15, 217, 814, 31, 58, 41, 206, 27, 600, 22, 209, 19, 95, 18, 61, 204, 128, 9, 293, 34, 77, 12, 102, 391, 205, 85, 893, 122, 51, 355, 205, 394, 40, 103, 325, 41, 12, 33, 21, 12, 310, 54, 417, 57, 24, 308, 19, 260, 37, 37, 9, 61, 61, 227, 50, 103, 145, 203, 17, 22, 3, 134, 218, 154, 48, 28, 115, 720, 78, 50, 47, 210, 24, 129, 391, 135, 50, 28, 8, 236, 307, 387, 371, 195, 55, 227, 152, 81, 7, 41, 63, 188, 38, 240, 39, 41, 237, 96, 319, 55, 51, 131, 129, 170, 56, 409, 318, 242, 307, 92, 155, 66, 25, 93, 238, 557, 372, 28, 14, 620, 15, 96, 78, 615, 320, 121, 265, 155, 54, 149, 160, 94, 28, 233, 42, 56, 90, 166, 36, 30, 259, 139, 66, 261, 17, 453, 165, 108, 161, 71, 208, 24, 132, 55, 238, 271, 886, 341, 528, 122, 22, 318, 227, 247, 83, 36, 409, 25, 10, 26, 278, 363, 520, 47, 14, 28, 102, 600, 341, 32, 390, 48, 326, 390, 107, 13, 432, 121, 551, 618, 115, 34, 196, 44, 9, 61, 92, 335, 51, 11, 121, 217, 204, 93, 41, 54, 319, 243, 57, 27, 39, 108, 6, 34, 122, 47, 122, 236, 234, 150, 195, 173, 195, 9, 96, 155, 15, 108, 103, 75, 620, 156, 38, 446, 129, 372, 227, 15, 61, 56, 454, 17, 132, 26, 77, 37, 104, 33, 132, 15, 72, 28, 19, 22, 78]}, "config": {"seed": 1, "exp_name": "dpo_mistral7b-instruct_oasst", "datasets": ["oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 41297, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/dpo_mistral7b-instruct_oasst", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/dpo_mistral7b-instruct_oasst/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "mistralai/Mistral-7B-Instruct-v0.1", "tokenizer_name_or_path": null, "load_from": null, "block_name": "MistralDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-13 05:31:49.001477", "total": 512, "seed": 0, "exp_name": "unaligned_mistral7b-instruct_oasst", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 318, "lengths": [198, 266, 33, 328, 289, 199, 245, 107, 187, 194, 629, 637, 60, 211, 219, 24, 286, 122, 303, 171, 13, 209, 637, 124, 332, 49, 119, 598, 441, 66, 189, 170, 392, 179, 235, 78, 715, 97, 304, 123, 577, 286, 136, 101, 64, 122, 315, 232, 144, 36, 36, 435, 20, 152, 14, 265, 216, 187, 226, 300, 61, 294, 206, 67, 47, 62, 63, 114, 52, 375, 107, 30, 375, 514, 192, 98, 24, 92, 611, 40, 73, 387, 633, 149, 415, 51, 184, 506, 269, 92, 277, 254, 603, 250, 149, 50, 95, 335, 10, 19, 117, 1066, 102, 84, 389, 23, 88, 176, 59, 142, 30, 192, 74, 457, 47, 199, 168, 126, 296, 464, 67, 44, 773, 198, 67, 147, 60, 141, 283, 925, 377, 363, 167, 391, 791, 354, 68, 290, 280, 486, 364, 277, 326, 256, 301, 266, 156, 256, 12, 181, 132, 188, 61, 127, 182, 254, 395, 425, 216, 541, 277, 261, 501, 202, 255, 103, 390, 19, 69, 173, 58, 217, 291, 40, 441, 98, 140, 114, 41, 80, 218, 165, 91, 47, 225, 402, 271, 104, 124, 36, 311, 138, 211, 309, 275, 281, 304, 245, 166, 212, 341, 76, 292, 221, 188, 75, 243, 328, 58, 429, 302, 641, 101, 145, 136, 729, 369, 150, 334, 94, 168, 32, 325, 255, 360, 112, 202, 696, 44, 49, 368, 20, 609, 486, 290, 16, 303, 11, 311, 82, 257, 30, 50, 144, 16, 219, 221, 64, 63, 111, 81, 257, 344, 69, 546, 21, 1606, 72, 310, 1279, 249, 17, 127, 66, 48, 1140, 115, 198, 136, 101, 317, 183, 81, 243, 232, 120, 16, 117, 30, 107, 229, 173, 162, 239, 464, 54, 299, 190, 425, 83, 284, 332, 16, 20, 81, 175, 140, 443, 37, 1623, 297, 171, 317, 9, 284, 193, 61, 27, 236, 122, 304, 255, 53, 214, 150, 68, 100, 50, 54, 104, 415, 90, 74, 320, 677, 48, 117, 50, 214, 146, 69, 468, 159, 376, 59, 334, 240, 233, 141, 114, 209, 115, 342, 146, 55, 115, 73, 70, 141, 34, 456, 36, 125, 363, 253, 147, 163, 251, 222, 82, 102, 296, 71, 239, 328, 286, 200, 129, 177, 26, 74, 439, 12, 198, 14, 6, 235, 31, 109, 308, 219, 288, 308, 368, 72, 202, 81, 267, 283, 209, 163, 90, 250, 342, 269, 145, 589, 317, 223, 19, 209, 457, 39, 207, 72, 260, 116, 339, 27, 83, 63, 438, 245, 168, 413, 321, 115, 23, 302, 121, 245, 326, 413, 100, 33, 372, 222, 134, 62, 330, 182, 109, 144, 47, 821, 259, 82, 985, 143, 158, 266, 209, 22, 324, 432, 399, 478, 76, 121, 178, 110, 18, 250, 395, 188, 413, 52, 446, 367, 195, 125, 123, 107, 168, 163, 380, 71, 31, 305, 66, 52, 96, 170, 374, 116, 292, 508, 224, 676, 86, 19, 119, 48, 28, 183, 297, 131, 297, 62, 29, 289, 56, 191, 464, 139, 332, 251, 250, 108, 253, 160, 223, 134, 56, 205, 208, 338, 176, 38, 16, 118, 437]}, "baseline": {"name": "chosen", "wins": 175, "lengths": [17, 328, 15, 432, 390, 96, 26, 56, 234, 22, 47, 79, 12, 326, 44, 29, 210, 72, 121, 520, 8, 172, 948, 187, 80, 83, 33, 1639, 109, 177, 114, 174, 45, 124, 205, 105, 124, 40, 542, 155, 519, 247, 185, 73, 34, 26, 465, 311, 67, 31, 95, 171, 53, 335, 60, 24, 67, 38, 132, 76, 46, 830, 53, 91, 53, 360, 20, 75, 98, 91, 156, 21, 83, 15, 122, 3, 12, 130, 446, 6, 78, 6, 61, 40, 180, 11, 703, 114, 198, 63, 35, 196, 415, 326, 7, 35, 32, 62, 11, 22, 19, 173, 49, 38, 110, 23, 50, 15, 276, 267, 28, 37, 34, 69, 66, 238, 147, 122, 99, 472, 31, 38, 218, 178, 41, 22, 44, 77, 284, 437, 26, 93, 53, 165, 61, 75, 81, 131, 332, 618, 107, 190, 64, 306, 3, 74, 21, 58, 9, 135, 278, 108, 34, 195, 858, 185, 428, 478, 242, 390, 125, 69, 150, 130, 259, 146, 451, 51, 138, 41, 22, 10, 26, 85, 57, 108, 62, 41, 363, 50, 77, 56, 122, 32, 75, 39, 61, 185, 551, 133, 168, 50, 454, 137, 261, 260, 73, 107, 145, 47, 705, 104, 503, 178, 368, 305, 69, 256, 6, 446, 140, 78, 138, 84, 112, 131, 168, 23, 29, 178, 47, 24, 31, 72, 551, 14, 32, 36, 21, 53, 122, 14, 842, 121, 66, 27, 188, 733, 53, 93, 298, 13, 19, 243, 9, 497, 122, 105, 12, 246, 28, 114, 479, 24, 720, 30, 44, 15, 217, 814, 31, 58, 41, 206, 27, 600, 22, 209, 19, 95, 18, 61, 204, 128, 9, 293, 34, 77, 12, 102, 391, 205, 85, 893, 122, 51, 355, 205, 394, 40, 103, 325, 41, 12, 33, 21, 12, 310, 54, 417, 57, 24, 308, 19, 260, 37, 37, 9, 61, 61, 227, 50, 103, 145, 203, 17, 22, 3, 134, 218, 154, 48, 28, 115, 720, 78, 50, 47, 210, 24, 129, 391, 135, 50, 28, 8, 236, 307, 387, 371, 195, 55, 227, 152, 81, 7, 41, 63, 188, 38, 240, 39, 41, 237, 96, 319, 55, 51, 131, 129, 170, 56, 409, 318, 242, 307, 92, 155, 66, 25, 93, 238, 557, 372, 28, 14, 620, 15, 96, 78, 615, 320, 121, 265, 155, 54, 149, 160, 94, 28, 233, 42, 56, 90, 166, 36, 30, 259, 139, 66, 261, 17, 453, 165, 108, 161, 71, 208, 24, 132, 55, 238, 271, 886, 341, 528, 122, 22, 318, 227, 247, 83, 36, 409, 25, 10, 26, 278, 363, 520, 47, 14, 28, 102, 600, 341, 32, 390, 48, 326, 390, 107, 13, 432, 121, 551, 618, 115, 34, 196, 44, 9, 61, 92, 335, 51, 11, 121, 217, 204, 93, 41, 54, 319, 243, 57, 27, 39, 108, 6, 34, 122, 47, 122, 236, 234, 150, 195, 173, 195, 9, 96, 155, 15, 108, 103, 75, 620, 156, 38, 446, 129, 372, 227, 15, 61, 56, 454, 17, 132, 26, 77, 37, 104, 33, 132, 15, 72, 28, 19, 22, 78]}, "config": {"seed": 1, "exp_name": "mistral7b-instruct_oasst", "datasets": ["oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 41297, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/dpo_mistral7b-instruct_oasst", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": null, "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "mistralai/Mistral-7B-Instruct-v0.1", "tokenizer_name_or_path": null, "load_from": null, "block_name": "MistralDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "dpo", "beta": 0.1, "trainer": "DPOTrainer", "dataloader": "PairedPreferenceDataLoader", "use_reference_model": true}}}
{"date": "2024-01-19 05:24:54.346029", "total": 512, "seed": 0, "exp_name": "kto-unary_mistral7b_oasst", "judge": "gpt-4-0613", "candidate": {"name": "policy", "wins": 323, "lengths": [264, 283, 155, 339, 257, 172, 90, 232, 246, 324, 277, 637, 54, 539, 574, 160, 280, 224, 284, 192, 276, 174, 637, 213, 315, 262, 430, 308, 302, 319, 373, 321, 438, 228, 176, 118, 429, 423, 567, 166, 727, 298, 127, 189, 124, 135, 395, 219, 227, 456, 206, 431, 32, 300, 270, 207, 183, 341, 245, 236, 79, 232, 359, 229, 259, 101, 191, 153, 157, 326, 174, 33, 323, 636, 338, 188, 102, 243, 229, 65, 321, 345, 509, 251, 266, 166, 241, 187, 300, 191, 179, 378, 943, 1404, 275, 92, 128, 554, 58, 63, 278, 194, 83, 260, 117, 109, 282, 298, 343, 185, 62, 223, 142, 207, 102, 214, 160, 179, 363, 464, 172, 150, 382, 154, 133, 269, 169, 246, 724, 635, 292, 344, 218, 286, 597, 370, 86, 398, 312, 557, 239, 120, 607, 317, 370, 216, 187, 115, 27, 202, 216, 281, 191, 372, 194, 242, 508, 317, 199, 187, 468, 318, 453, 149, 251, 175, 383, 75, 143, 156, 124, 252, 398, 156, 356, 169, 182, 304, 120, 131, 258, 195, 349, 66, 389, 318, 297, 339, 337, 97, 355, 194, 212, 603, 259, 291, 284, 315, 150, 239, 236, 115, 729, 262, 341, 385, 167, 295, 244, 391, 341, 570, 97, 204, 252, 354, 233, 214, 363, 223, 406, 130, 387, 272, 287, 305, 177, 708, 72, 158, 281, 238, 1086, 240, 192, 87, 391, 438, 452, 416, 335, 9, 80, 252, 18, 300, 145, 81, 182, 126, 299, 304, 399, 140, 412, 37, 246, 336, 359, 833, 119, 11, 348, 221, 188, 569, 291, 253, 209, 235, 157, 225, 137, 389, 306, 116, 27, 183, 340, 184, 340, 180, 359, 343, 458, 146, 419, 169, 276, 132, 329, 627, 27, 682, 101, 283, 471, 462, 316, 107, 298, 197, 327, 200, 226, 235, 154, 280, 125, 239, 248, 209, 319, 311, 169, 343, 262, 120, 252, 151, 330, 247, 94, 167, 681, 210, 295, 292, 147, 187, 128, 290, 247, 284, 145, 221, 83, 250, 201, 128, 236, 246, 294, 180, 155, 150, 295, 173, 179, 79, 332, 69, 208, 415, 191, 369, 194, 416, 136, 325, 264, 235, 291, 140, 333, 509, 191, 256, 222, 299, 88, 96, 915, 277, 38, 189, 285, 175, 174, 300, 229, 235, 289, 385, 210, 1701, 396, 232, 249, 463, 220, 119, 443, 341, 255, 112, 398, 1072, 321, 60, 184, 167, 226, 170, 392, 305, 265, 306, 178, 211, 69, 921, 143, 895, 329, 263, 325, 224, 271, 252, 368, 301, 568, 364, 293, 343, 92, 195, 141, 325, 204, 122, 284, 78, 523, 470, 141, 747, 199, 208, 412, 230, 908, 324, 288, 320, 559, 224, 186, 740, 117, 205, 268, 148, 266, 369, 87, 388, 348, 158, 297, 222, 371, 358, 523, 518, 48, 18, 443, 94, 206, 185, 332, 383, 485, 302, 477, 116, 221, 335, 403, 257, 245, 284, 174, 204, 198, 336, 185, 118, 394, 331, 285, 241, 291, 368, 363, 259, 115, 399, 172, 908, 154, 412, 67, 156, 118, 215, 108, 180, 249, 154]}, "baseline": {"name": "chosen", "wins": 177, "lengths": [17, 328, 15, 432, 390, 96, 26, 56, 234, 22, 47, 79, 12, 326, 44, 29, 210, 72, 121, 520, 8, 172, 948, 187, 80, 83, 33, 1639, 109, 177, 114, 174, 45, 124, 205, 105, 124, 40, 542, 155, 519, 247, 185, 73, 34, 26, 465, 311, 67, 31, 95, 171, 53, 335, 60, 24, 67, 38, 132, 76, 46, 830, 53, 91, 53, 360, 20, 75, 98, 91, 156, 21, 83, 15, 122, 3, 12, 130, 446, 6, 78, 6, 61, 40, 180, 11, 703, 114, 198, 63, 35, 196, 415, 326, 7, 35, 32, 62, 11, 22, 19, 173, 49, 38, 110, 23, 50, 15, 276, 267, 28, 37, 34, 69, 66, 238, 147, 122, 99, 472, 31, 38, 218, 178, 41, 22, 44, 77, 284, 437, 26, 93, 53, 165, 61, 75, 81, 131, 332, 618, 107, 190, 64, 306, 3, 74, 21, 58, 9, 135, 278, 108, 34, 195, 858, 185, 428, 478, 242, 390, 125, 69, 150, 130, 259, 146, 451, 51, 138, 41, 22, 10, 26, 85, 57, 108, 62, 41, 363, 50, 77, 56, 122, 32, 75, 39, 61, 185, 551, 133, 168, 50, 454, 137, 261, 260, 73, 107, 145, 47, 705, 104, 503, 178, 368, 305, 69, 256, 6, 446, 140, 78, 138, 84, 112, 131, 168, 23, 29, 178, 47, 24, 31, 72, 551, 14, 32, 36, 21, 53, 122, 14, 842, 121, 66, 27, 188, 733, 53, 93, 298, 13, 19, 243, 9, 497, 122, 105, 12, 246, 28, 114, 479, 24, 720, 30, 44, 15, 217, 814, 31, 58, 41, 206, 27, 600, 22, 209, 19, 95, 18, 61, 204, 128, 9, 293, 34, 77, 12, 102, 391, 205, 85, 893, 122, 51, 355, 205, 394, 40, 103, 325, 41, 12, 33, 21, 12, 310, 54, 417, 57, 24, 308, 19, 260, 37, 37, 9, 61, 61, 227, 50, 103, 145, 203, 17, 22, 3, 134, 218, 154, 48, 28, 115, 720, 78, 50, 47, 210, 24, 129, 391, 135, 50, 28, 8, 236, 307, 387, 371, 195, 55, 227, 152, 81, 7, 41, 63, 188, 38, 240, 39, 41, 237, 96, 319, 55, 51, 131, 129, 170, 56, 409, 318, 242, 307, 92, 155, 66, 25, 93, 238, 557, 372, 28, 14, 620, 15, 96, 78, 615, 320, 121, 265, 155, 54, 149, 160, 94, 28, 233, 42, 56, 90, 166, 36, 30, 259, 139, 66, 261, 17, 453, 165, 108, 161, 71, 208, 24, 132, 55, 238, 271, 886, 341, 528, 122, 22, 318, 227, 247, 83, 36, 409, 25, 10, 26, 278, 363, 520, 47, 14, 28, 102, 600, 341, 32, 390, 48, 326, 390, 107, 13, 432, 121, 551, 618, 115, 34, 196, 44, 9, 61, 92, 335, 51, 11, 121, 217, 204, 93, 41, 54, 319, 243, 57, 27, 39, 108, 6, 34, 122, 47, 122, 236, 234, 150, 195, 173, 195, 9, 96, 155, 15, 108, 103, 75, 620, 156, 38, 446, 129, 372, 227, 15, 61, 56, 454, 17, 132, 26, 77, 37, 104, 33, 132, 15, 72, 28, 19, 22, 78]}, "config": {"seed": 1, "exp_name": "kto-unary_mistral7b_oasst", "datasets": ["oasst"], "mode": "sample", "debug": false, "use_fsdp": true, "fsdp_port": 59355, "wandb": {"enabled": true, "entity": null, "project": "archangel"}, "cache_dir": "/data/models/archangel", "local_run_dir": "/data/models/archangel/kto-unary_mistral7b_oasst", "do_first_eval": true, "minimum_log_interval_secs": 1.0, "intermediate_checkpoints": false, "trainer": "BasicTrainer", "lr": 5e-07, "n_epochs": 1, "n_examples": null, "optimizer": "RMSprop", "warmup_steps": 150, "eval_every": 20000, "n_samples": 512, "samples_dir": "samples/", "n_eval_examples": 512, "saved_policy": "/data/models/archangel/kto-unary_mistral7b_oasst/LATEST/policy.pt", "top_p": 0.95, "human_prefix": "\n<|user|>\n", "assistant_prefix": "\n<|assistant|>\n", "human_suffix": "", "assistant_suffix": "", "frac_unique_desirable": 1.0, "frac_unique_undesirable": 1.0, "model": {"name_or_path": "mistralai/Mistral-7B-v0.1", "tokenizer_name_or_path": null, "load_from": null, "block_name": "MistralDecoderLayer", "policy_dtype": "bfloat16", "fsdp_policy_mp": null, "reference_dtype": "bfloat16", "max_grad_norm": 10.0, "v_head_max_grad_norm": 0.1, "max_length": 2048, "max_prompt_length": 1024, "activation_checkpointing": true, "batch_size": 32, "gradient_accumulation_steps": 1, "eval_batch_size": 32, "use_flash_attention": true}, "loss": {"name": "kto", "beta": 0.1, "trainer": "KTOTrainer", "dataloader": "UnaryDataLoader", "use_reference_model": true, "desirable_weight": 1.0, "undesirable_weight": 1.0}}}