-
Notifications
You must be signed in to change notification settings - Fork 0
/
training_log.txt
9708 lines (9708 loc) · 607 KB
/
training_log.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
Epoch [1/3], Step [1/3236], Loss: 9.2074, Perplexity: 9971.0991
Epoch [1/3], Step [2/3236], Loss: 9.0559, Perplexity: 8569.1387
Epoch [1/3], Step [3/3236], Loss: 8.9044, Perplexity: 7364.1709
Epoch [1/3], Step [4/3236], Loss: 8.7128, Perplexity: 6080.0058
Epoch [1/3], Step [5/3236], Loss: 8.2568, Perplexity: 3853.7669
Epoch [1/3], Step [6/3236], Loss: 7.8992, Perplexity: 2695.2242
Epoch [1/3], Step [7/3236], Loss: 7.0748, Perplexity: 1181.7519
Epoch [1/3], Step [8/3236], Loss: 6.2328, Perplexity: 509.1559
Epoch [1/3], Step [9/3236], Loss: 5.6064, Perplexity: 272.1659
Epoch [1/3], Step [10/3236], Loss: 5.3177, Perplexity: 203.9087
Epoch [1/3], Step [11/3236], Loss: 5.2345, Perplexity: 187.6309
Epoch [1/3], Step [12/3236], Loss: 5.1802, Perplexity: 177.7202
Epoch [1/3], Step [13/3236], Loss: 5.0887, Perplexity: 162.1813
Epoch [1/3], Step [14/3236], Loss: 5.2100, Perplexity: 183.0936
Epoch [1/3], Step [15/3236], Loss: 5.4153, Perplexity: 224.8217
Epoch [1/3], Step [16/3236], Loss: 4.7276, Perplexity: 113.0278
Epoch [1/3], Step [17/3236], Loss: 4.8178, Perplexity: 123.6982
Epoch [1/3], Step [18/3236], Loss: 4.8233, Perplexity: 124.3742
Epoch [1/3], Step [19/3236], Loss: 4.8232, Perplexity: 124.3645
Epoch [1/3], Step [20/3236], Loss: 4.8637, Perplexity: 129.4985
Epoch [1/3], Step [21/3236], Loss: 4.7208, Perplexity: 112.2584
Epoch [1/3], Step [22/3236], Loss: 4.6124, Perplexity: 100.7260
Epoch [1/3], Step [23/3236], Loss: 4.6654, Perplexity: 106.2063
Epoch [1/3], Step [24/3236], Loss: 4.6078, Perplexity: 100.2662
Epoch [1/3], Step [25/3236], Loss: 4.5653, Perplexity: 96.0932
Epoch [1/3], Step [26/3236], Loss: 4.7316, Perplexity: 113.4755
Epoch [1/3], Step [27/3236], Loss: 4.4951, Perplexity: 89.5772
Epoch [1/3], Step [28/3236], Loss: 4.5164, Perplexity: 91.5035
Epoch [1/3], Step [29/3236], Loss: 4.4536, Perplexity: 85.9335
Epoch [1/3], Step [30/3236], Loss: 4.5179, Perplexity: 91.6433
Epoch [1/3], Step [31/3236], Loss: 4.5320, Perplexity: 92.9416
Epoch [1/3], Step [32/3236], Loss: 4.4096, Perplexity: 82.2367
Epoch [1/3], Step [33/3236], Loss: 4.4790, Perplexity: 88.1504
Epoch [1/3], Step [34/3236], Loss: 4.6706, Perplexity: 106.7610
Epoch [1/3], Step [35/3236], Loss: 4.5446, Perplexity: 94.1210
Epoch [1/3], Step [36/3236], Loss: 4.3708, Perplexity: 79.1070
Epoch [1/3], Step [37/3236], Loss: 4.3754, Perplexity: 79.4742
Epoch [1/3], Step [38/3236], Loss: 4.2960, Perplexity: 73.4088
Epoch [1/3], Step [39/3236], Loss: 4.2328, Perplexity: 68.9099
Epoch [1/3], Step [40/3236], Loss: 4.3319, Perplexity: 76.0906
Epoch [1/3], Step [41/3236], Loss: 4.3038, Perplexity: 73.9774
Epoch [1/3], Step [42/3236], Loss: 4.2249, Perplexity: 68.3664
Epoch [1/3], Step [43/3236], Loss: 4.3781, Perplexity: 79.6839
Epoch [1/3], Step [44/3236], Loss: 4.5484, Perplexity: 94.4810
Epoch [1/3], Step [45/3236], Loss: 4.2087, Perplexity: 67.2711
Epoch [1/3], Step [46/3236], Loss: 4.1453, Perplexity: 63.1353
Epoch [1/3], Step [47/3236], Loss: 4.3169, Perplexity: 74.9541
Epoch [1/3], Step [48/3236], Loss: 4.1681, Perplexity: 64.5936
Epoch [1/3], Step [49/3236], Loss: 4.1522, Perplexity: 63.5760
Epoch [1/3], Step [50/3236], Loss: 4.0536, Perplexity: 57.6050
Epoch [1/3], Step [51/3236], Loss: 4.8521, Perplexity: 128.0112
Epoch [1/3], Step [52/3236], Loss: 4.1742, Perplexity: 64.9856
Epoch [1/3], Step [53/3236], Loss: 4.5702, Perplexity: 96.5590
Epoch [1/3], Step [54/3236], Loss: 4.1209, Perplexity: 61.6176
Epoch [1/3], Step [55/3236], Loss: 4.2278, Perplexity: 68.5692
Epoch [1/3], Step [56/3236], Loss: 4.0813, Perplexity: 59.2205
Epoch [1/3], Step [57/3236], Loss: 4.1281, Perplexity: 62.0580
Epoch [1/3], Step [58/3236], Loss: 4.0760, Perplexity: 58.9111
Epoch [1/3], Step [59/3236], Loss: 4.2013, Perplexity: 66.7716
Epoch [1/3], Step [60/3236], Loss: 4.0416, Perplexity: 56.9175
Epoch [1/3], Step [61/3236], Loss: 4.1470, Perplexity: 63.2451
Epoch [1/3], Step [62/3236], Loss: 4.0052, Perplexity: 54.8817
Epoch [1/3], Step [63/3236], Loss: 4.0595, Perplexity: 57.9436
Epoch [1/3], Step [64/3236], Loss: 3.9794, Perplexity: 53.4850
Epoch [1/3], Step [65/3236], Loss: 4.0664, Perplexity: 58.3451
Epoch [1/3], Step [66/3236], Loss: 4.8803, Perplexity: 131.6647
Epoch [1/3], Step [67/3236], Loss: 4.0321, Perplexity: 56.3818
Epoch [1/3], Step [68/3236], Loss: 4.3702, Perplexity: 79.0599
Epoch [1/3], Step [69/3236], Loss: 4.6115, Perplexity: 100.6348
Epoch [1/3], Step [70/3236], Loss: 4.0102, Perplexity: 55.1573
Epoch [1/3], Step [71/3236], Loss: 3.8959, Perplexity: 49.2004
Epoch [1/3], Step [72/3236], Loss: 4.0810, Perplexity: 59.2049
Epoch [1/3], Step [73/3236], Loss: 3.9111, Perplexity: 49.9520
Epoch [1/3], Step [74/3236], Loss: 4.0747, Perplexity: 58.8354
Epoch [1/3], Step [75/3236], Loss: 4.0023, Perplexity: 54.7221
Epoch [1/3], Step [76/3236], Loss: 4.3395, Perplexity: 76.6664
Epoch [1/3], Step [77/3236], Loss: 3.8838, Perplexity: 48.6062
Epoch [1/3], Step [78/3236], Loss: 4.4794, Perplexity: 88.1852
Epoch [1/3], Step [79/3236], Loss: 4.0486, Perplexity: 57.3186
Epoch [1/3], Step [80/3236], Loss: 3.9362, Perplexity: 51.2239
Epoch [1/3], Step [81/3236], Loss: 3.9327, Perplexity: 51.0431
Epoch [1/3], Step [82/3236], Loss: 3.9042, Perplexity: 49.6090
Epoch [1/3], Step [83/3236], Loss: 4.2714, Perplexity: 71.6198
Epoch [1/3], Step [84/3236], Loss: 3.8088, Perplexity: 45.0974
Epoch [1/3], Step [85/3236], Loss: 3.8764, Perplexity: 48.2480
Epoch [1/3], Step [86/3236], Loss: 3.9619, Perplexity: 52.5575
Epoch [1/3], Step [87/3236], Loss: 3.9181, Perplexity: 50.3047
Epoch [1/3], Step [88/3236], Loss: 4.2296, Perplexity: 68.6917
Epoch [1/3], Step [89/3236], Loss: 4.4092, Perplexity: 82.2065
Epoch [1/3], Step [90/3236], Loss: 3.9192, Perplexity: 50.3623
Epoch [1/3], Step [91/3236], Loss: 3.7880, Perplexity: 44.1689
Epoch [1/3], Step [92/3236], Loss: 3.7743, Perplexity: 43.5686
Epoch [1/3], Step [93/3236], Loss: 3.7990, Perplexity: 44.6581
Epoch [1/3], Step [94/3236], Loss: 3.8203, Perplexity: 45.6166
Epoch [1/3], Step [95/3236], Loss: 3.9275, Perplexity: 50.7799
Epoch [1/3], Step [96/3236], Loss: 3.8648, Perplexity: 47.6922
Epoch [1/3], Step [97/3236], Loss: 3.7868, Perplexity: 44.1139
Epoch [1/3], Step [98/3236], Loss: 3.9210, Perplexity: 50.4516
Epoch [1/3], Step [99/3236], Loss: 3.8002, Perplexity: 44.7098
Epoch [1/3], Step [100/3236], Loss: 4.0337, Perplexity: 56.4693
Epoch [1/3], Step [101/3236], Loss: 3.8043, Perplexity: 44.8953
Epoch [1/3], Step [102/3236], Loss: 3.6890, Perplexity: 40.0046
Epoch [1/3], Step [103/3236], Loss: 3.7702, Perplexity: 43.3881
Epoch [1/3], Step [104/3236], Loss: 3.7135, Perplexity: 40.9967
Epoch [1/3], Step [105/3236], Loss: 3.7574, Perplexity: 42.8364
Epoch [1/3], Step [106/3236], Loss: 4.0484, Perplexity: 57.3077
Epoch [1/3], Step [107/3236], Loss: 3.7564, Perplexity: 42.7926
Epoch [1/3], Step [108/3236], Loss: 3.8896, Perplexity: 48.8926
Epoch [1/3], Step [109/3236], Loss: 3.9323, Perplexity: 51.0242
Epoch [1/3], Step [110/3236], Loss: 3.7873, Perplexity: 44.1370
Epoch [1/3], Step [111/3236], Loss: 3.7921, Perplexity: 44.3498
Epoch [1/3], Step [112/3236], Loss: 3.7328, Perplexity: 41.7980
Epoch [1/3], Step [113/3236], Loss: 3.7268, Perplexity: 41.5475
Epoch [1/3], Step [114/3236], Loss: 4.2500, Perplexity: 70.1081
Epoch [1/3], Step [115/3236], Loss: 3.7192, Perplexity: 41.2322
Epoch [1/3], Step [116/3236], Loss: 3.7828, Perplexity: 43.9405
Epoch [1/3], Step [117/3236], Loss: 3.6932, Perplexity: 40.1743
Epoch [1/3], Step [118/3236], Loss: 3.6115, Perplexity: 37.0201
Epoch [1/3], Step [119/3236], Loss: 3.6681, Perplexity: 39.1763
Epoch [1/3], Step [120/3236], Loss: 3.7813, Perplexity: 43.8745
Epoch [1/3], Step [121/3236], Loss: 3.8499, Perplexity: 46.9881
Epoch [1/3], Step [122/3236], Loss: 3.7034, Perplexity: 40.5834
Epoch [1/3], Step [123/3236], Loss: 3.7147, Perplexity: 41.0443
Epoch [1/3], Step [124/3236], Loss: 3.7257, Perplexity: 41.5023
Epoch [1/3], Step [125/3236], Loss: 3.8021, Perplexity: 44.7961
Epoch [1/3], Step [126/3236], Loss: 3.5834, Perplexity: 35.9962
Epoch [1/3], Step [127/3236], Loss: 3.8172, Perplexity: 45.4779
Epoch [1/3], Step [128/3236], Loss: 3.6394, Perplexity: 38.0701
Epoch [1/3], Step [129/3236], Loss: 3.6363, Perplexity: 37.9525
Epoch [1/3], Step [130/3236], Loss: 3.6813, Perplexity: 39.6997
Epoch [1/3], Step [131/3236], Loss: 3.6842, Perplexity: 39.8136
Epoch [1/3], Step [132/3236], Loss: 3.6948, Perplexity: 40.2390
Epoch [1/3], Step [133/3236], Loss: 3.4712, Perplexity: 32.1765
Epoch [1/3], Step [134/3236], Loss: 3.5564, Perplexity: 35.0356
Epoch [1/3], Step [135/3236], Loss: 3.6076, Perplexity: 36.8790
Epoch [1/3], Step [136/3236], Loss: 3.7536, Perplexity: 42.6762
Epoch [1/3], Step [137/3236], Loss: 3.5915, Perplexity: 36.2890
Epoch [1/3], Step [138/3236], Loss: 4.0280, Perplexity: 56.1484
Epoch [1/3], Step [139/3236], Loss: 3.5818, Perplexity: 35.9386
Epoch [1/3], Step [140/3236], Loss: 3.7307, Perplexity: 41.7074
Epoch [1/3], Step [141/3236], Loss: 3.6326, Perplexity: 37.8126
Epoch [1/3], Step [142/3236], Loss: 3.6423, Perplexity: 38.1792
Epoch [1/3], Step [143/3236], Loss: 3.4708, Perplexity: 32.1623
Epoch [1/3], Step [144/3236], Loss: 4.0719, Perplexity: 58.6701
Epoch [1/3], Step [145/3236], Loss: 3.6128, Perplexity: 37.0694
Epoch [1/3], Step [146/3236], Loss: 4.4026, Perplexity: 81.6594
Epoch [1/3], Step [147/3236], Loss: 3.8537, Perplexity: 47.1671
Epoch [1/3], Step [148/3236], Loss: 3.6843, Perplexity: 39.8166
Epoch [1/3], Step [149/3236], Loss: 3.6270, Perplexity: 37.6010
Epoch [1/3], Step [150/3236], Loss: 3.7239, Perplexity: 41.4274
Epoch [1/3], Step [151/3236], Loss: 3.6712, Perplexity: 39.2996
Epoch [1/3], Step [152/3236], Loss: 3.7269, Perplexity: 41.5506
Epoch [1/3], Step [153/3236], Loss: 3.5361, Perplexity: 34.3337
Epoch [1/3], Step [154/3236], Loss: 3.6141, Perplexity: 37.1186
Epoch [1/3], Step [155/3236], Loss: 3.5294, Perplexity: 34.1020
Epoch [1/3], Step [156/3236], Loss: 3.7214, Perplexity: 41.3221
Epoch [1/3], Step [157/3236], Loss: 3.6243, Perplexity: 37.4969
Epoch [1/3], Step [158/3236], Loss: 3.7389, Perplexity: 42.0529
Epoch [1/3], Step [159/3236], Loss: 3.7025, Perplexity: 40.5481
Epoch [1/3], Step [160/3236], Loss: 3.6144, Perplexity: 37.1286
Epoch [1/3], Step [161/3236], Loss: 3.6746, Perplexity: 39.4318
Epoch [1/3], Step [162/3236], Loss: 3.8175, Perplexity: 45.4911
Epoch [1/3], Step [163/3236], Loss: 3.7539, Perplexity: 42.6851
Epoch [1/3], Step [164/3236], Loss: 3.6697, Perplexity: 39.2389
Epoch [1/3], Step [165/3236], Loss: 3.5008, Perplexity: 33.1416
Epoch [1/3], Step [166/3236], Loss: 3.8001, Perplexity: 44.7056
Epoch [1/3], Step [167/3236], Loss: 3.5243, Perplexity: 33.9284
Epoch [1/3], Step [168/3236], Loss: 3.5682, Perplexity: 35.4539
Epoch [1/3], Step [169/3236], Loss: 3.5825, Perplexity: 35.9643
Epoch [1/3], Step [170/3236], Loss: 3.6222, Perplexity: 37.4181
Epoch [1/3], Step [171/3236], Loss: 3.5330, Perplexity: 34.2252
Epoch [1/3], Step [172/3236], Loss: 3.6451, Perplexity: 38.2865
Epoch [1/3], Step [173/3236], Loss: 3.4728, Perplexity: 32.2282
Epoch [1/3], Step [174/3236], Loss: 3.6134, Perplexity: 37.0927
Epoch [1/3], Step [175/3236], Loss: 3.8127, Perplexity: 45.2733
Epoch [1/3], Step [176/3236], Loss: 3.4370, Perplexity: 31.0927
Epoch [1/3], Step [177/3236], Loss: 3.5371, Perplexity: 34.3675
Epoch [1/3], Step [178/3236], Loss: 3.5884, Perplexity: 36.1777
Epoch [1/3], Step [179/3236], Loss: 3.4052, Perplexity: 30.1194
Epoch [1/3], Step [180/3236], Loss: 3.4205, Perplexity: 30.5848
Epoch [1/3], Step [181/3236], Loss: 3.3794, Perplexity: 29.3530
Epoch [1/3], Step [182/3236], Loss: 4.1145, Perplexity: 61.2239
Epoch [1/3], Step [183/3236], Loss: 3.3077, Perplexity: 27.3215
Epoch [1/3], Step [184/3236], Loss: 4.6241, Perplexity: 101.9134
Epoch [1/3], Step [185/3236], Loss: 3.5696, Perplexity: 35.5016
Epoch [1/3], Step [186/3236], Loss: 3.3692, Perplexity: 29.0555
Epoch [1/3], Step [187/3236], Loss: 3.5375, Perplexity: 34.3821
Epoch [1/3], Step [188/3236], Loss: 3.4795, Perplexity: 32.4448
Epoch [1/3], Step [189/3236], Loss: 3.4843, Perplexity: 32.5993
Epoch [1/3], Step [190/3236], Loss: 3.3526, Perplexity: 28.5781
Epoch [1/3], Step [191/3236], Loss: 3.3476, Perplexity: 28.4333
Epoch [1/3], Step [192/3236], Loss: 3.6415, Perplexity: 38.1502
Epoch [1/3], Step [193/3236], Loss: 3.4159, Perplexity: 30.4443
Epoch [1/3], Step [194/3236], Loss: 3.5194, Perplexity: 33.7640
Epoch [1/3], Step [195/3236], Loss: 3.4656, Perplexity: 31.9967
Epoch [1/3], Step [196/3236], Loss: 3.4788, Perplexity: 32.4194
Epoch [1/3], Step [197/3236], Loss: 3.6849, Perplexity: 39.8408
Epoch [1/3], Step [198/3236], Loss: 3.4118, Perplexity: 30.3186
Epoch [1/3], Step [199/3236], Loss: 4.0704, Perplexity: 58.5797
Epoch [1/3], Step [200/3236], Loss: 3.3766, Perplexity: 29.2698
Epoch [1/3], Step [201/3236], Loss: 3.6498, Perplexity: 38.4662
Epoch [1/3], Step [202/3236], Loss: 3.2835, Perplexity: 26.6682
Epoch [1/3], Step [203/3236], Loss: 3.3052, Perplexity: 27.2547
Epoch [1/3], Step [204/3236], Loss: 3.5920, Perplexity: 36.3079
Epoch [1/3], Step [205/3236], Loss: 3.2511, Perplexity: 25.8176
Epoch [1/3], Step [206/3236], Loss: 3.2647, Perplexity: 26.1712
Epoch [1/3], Step [207/3236], Loss: 3.3743, Perplexity: 29.2051
Epoch [1/3], Step [208/3236], Loss: 3.4941, Perplexity: 32.9213
Epoch [1/3], Step [209/3236], Loss: 3.6360, Perplexity: 37.9398
Epoch [1/3], Step [210/3236], Loss: 3.4018, Perplexity: 30.0193
Epoch [1/3], Step [211/3236], Loss: 3.5336, Perplexity: 34.2486
Epoch [1/3], Step [212/3236], Loss: 3.5306, Perplexity: 34.1458
Epoch [1/3], Step [213/3236], Loss: 3.2928, Perplexity: 26.9177
Epoch [1/3], Step [214/3236], Loss: 3.4710, Perplexity: 32.1690
Epoch [1/3], Step [215/3236], Loss: 3.4725, Perplexity: 32.2178
Epoch [1/3], Step [216/3236], Loss: 3.5442, Perplexity: 34.6118
Epoch [1/3], Step [217/3236], Loss: 3.4789, Perplexity: 32.4231
Epoch [1/3], Step [218/3236], Loss: 3.4513, Perplexity: 31.5428
Epoch [1/3], Step [219/3236], Loss: 3.4146, Perplexity: 30.4058
Epoch [1/3], Step [220/3236], Loss: 3.8760, Perplexity: 48.2322
Epoch [1/3], Step [221/3236], Loss: 3.6699, Perplexity: 39.2476
Epoch [1/3], Step [222/3236], Loss: 3.3711, Perplexity: 29.1091
Epoch [1/3], Step [223/3236], Loss: 3.5976, Perplexity: 36.5103
Epoch [1/3], Step [224/3236], Loss: 3.6598, Perplexity: 38.8552
Epoch [1/3], Step [225/3236], Loss: 3.3319, Perplexity: 27.9911
Epoch [1/3], Step [226/3236], Loss: 4.1944, Perplexity: 66.3110
Epoch [1/3], Step [227/3236], Loss: 3.3369, Perplexity: 28.1323
Epoch [1/3], Step [228/3236], Loss: 3.4713, Perplexity: 32.1791
Epoch [1/3], Step [229/3236], Loss: 3.5034, Perplexity: 33.2297
Epoch [1/3], Step [230/3236], Loss: 3.8595, Perplexity: 47.4418
Epoch [1/3], Step [231/3236], Loss: 3.3217, Perplexity: 27.7083
Epoch [1/3], Step [232/3236], Loss: 3.4625, Perplexity: 31.8956
Epoch [1/3], Step [233/3236], Loss: 3.3391, Perplexity: 28.1944
Epoch [1/3], Step [234/3236], Loss: 3.5731, Perplexity: 35.6273
Epoch [1/3], Step [235/3236], Loss: 3.2887, Perplexity: 26.8076
Epoch [1/3], Step [236/3236], Loss: 3.3397, Perplexity: 28.2098
Epoch [1/3], Step [237/3236], Loss: 3.6820, Perplexity: 39.7268
Epoch [1/3], Step [238/3236], Loss: 3.4409, Perplexity: 31.2137
Epoch [1/3], Step [239/3236], Loss: 3.5566, Perplexity: 35.0435
Epoch [1/3], Step [240/3236], Loss: 3.3506, Perplexity: 28.5199
Epoch [1/3], Step [241/3236], Loss: 3.4602, Perplexity: 31.8248
Epoch [1/3], Step [242/3236], Loss: 3.2585, Perplexity: 26.0095
Epoch [1/3], Step [243/3236], Loss: 3.3850, Perplexity: 29.5175
Epoch [1/3], Step [244/3236], Loss: 3.3700, Perplexity: 29.0787
Epoch [1/3], Step [245/3236], Loss: 3.8614, Perplexity: 47.5317
Epoch [1/3], Step [246/3236], Loss: 3.3061, Perplexity: 27.2794
Epoch [1/3], Step [247/3236], Loss: 3.3614, Perplexity: 28.8298
Epoch [1/3], Step [248/3236], Loss: 3.7443, Perplexity: 42.2789
Epoch [1/3], Step [249/3236], Loss: 3.3536, Perplexity: 28.6051
Epoch [1/3], Step [250/3236], Loss: 3.5930, Perplexity: 36.3411
Epoch [1/3], Step [251/3236], Loss: 3.4791, Perplexity: 32.4312
Epoch [1/3], Step [252/3236], Loss: 3.2242, Perplexity: 25.1347
Epoch [1/3], Step [253/3236], Loss: 3.4059, Perplexity: 30.1414
Epoch [1/3], Step [254/3236], Loss: 3.5432, Perplexity: 34.5763
Epoch [1/3], Step [255/3236], Loss: 3.2910, Perplexity: 26.8685
Epoch [1/3], Step [256/3236], Loss: 3.2928, Perplexity: 26.9179
Epoch [1/3], Step [257/3236], Loss: 3.4497, Perplexity: 31.4896
Epoch [1/3], Step [258/3236], Loss: 3.4113, Perplexity: 30.3036
Epoch [1/3], Step [259/3236], Loss: 3.5723, Perplexity: 35.5990
Epoch [1/3], Step [260/3236], Loss: 3.3434, Perplexity: 28.3149
Epoch [1/3], Step [261/3236], Loss: 3.4666, Perplexity: 32.0282
Epoch [1/3], Step [262/3236], Loss: 3.3809, Perplexity: 29.3972
Epoch [1/3], Step [263/3236], Loss: 3.2823, Perplexity: 26.6379
Epoch [1/3], Step [264/3236], Loss: 3.2957, Perplexity: 26.9958
Epoch [1/3], Step [265/3236], Loss: 4.2631, Perplexity: 71.0300
Epoch [1/3], Step [266/3236], Loss: 3.1797, Perplexity: 24.0397
Epoch [1/3], Step [267/3236], Loss: 3.6141, Perplexity: 37.1170
Epoch [1/3], Step [268/3236], Loss: 3.3794, Perplexity: 29.3540
Epoch [1/3], Step [269/3236], Loss: 3.3168, Perplexity: 27.5708
Epoch [1/3], Step [270/3236], Loss: 3.3267, Perplexity: 27.8452
Epoch [1/3], Step [271/3236], Loss: 3.4546, Perplexity: 31.6453
Epoch [1/3], Step [272/3236], Loss: 3.2927, Perplexity: 26.9160
Epoch [1/3], Step [273/3236], Loss: 3.4761, Perplexity: 32.3340
Epoch [1/3], Step [274/3236], Loss: 3.2616, Perplexity: 26.0924
Epoch [1/3], Step [275/3236], Loss: 3.3694, Perplexity: 29.0608
Epoch [1/3], Step [276/3236], Loss: 3.3331, Perplexity: 28.0254
Epoch [1/3], Step [277/3236], Loss: 3.3428, Perplexity: 28.2988
Epoch [1/3], Step [278/3236], Loss: 3.2306, Perplexity: 25.2953
Epoch [1/3], Step [279/3236], Loss: 3.1335, Perplexity: 22.9546
Epoch [1/3], Step [280/3236], Loss: 3.9199, Perplexity: 50.3949
Epoch [1/3], Step [281/3236], Loss: 3.2978, Perplexity: 27.0534
Epoch [1/3], Step [282/3236], Loss: 4.1304, Perplexity: 62.2000
Epoch [1/3], Step [283/3236], Loss: 4.0054, Perplexity: 54.8951
Epoch [1/3], Step [284/3236], Loss: 3.4448, Perplexity: 31.3369
Epoch [1/3], Step [285/3236], Loss: 3.2363, Perplexity: 25.4387
Epoch [1/3], Step [286/3236], Loss: 3.2084, Perplexity: 24.7391
Epoch [1/3], Step [287/3236], Loss: 3.4126, Perplexity: 30.3433
Epoch [1/3], Step [288/3236], Loss: 3.3320, Perplexity: 27.9937
Epoch [1/3], Step [289/3236], Loss: 3.2265, Perplexity: 25.1907
Epoch [1/3], Step [290/3236], Loss: 3.4724, Perplexity: 32.2147
Epoch [1/3], Step [291/3236], Loss: 3.2784, Perplexity: 26.5330
Epoch [1/3], Step [292/3236], Loss: 3.7265, Perplexity: 41.5321
Epoch [1/3], Step [293/3236], Loss: 3.4153, Perplexity: 30.4248
Epoch [1/3], Step [294/3236], Loss: 3.3980, Perplexity: 29.9038
Epoch [1/3], Step [295/3236], Loss: 3.3365, Perplexity: 28.1213
Epoch [1/3], Step [296/3236], Loss: 3.6670, Perplexity: 39.1344
Epoch [1/3], Step [297/3236], Loss: 3.1524, Perplexity: 23.3921
Epoch [1/3], Step [298/3236], Loss: 3.3173, Perplexity: 27.5865
Epoch [1/3], Step [299/3236], Loss: 3.1456, Perplexity: 23.2328
Epoch [1/3], Step [300/3236], Loss: 3.1631, Perplexity: 23.6430
Epoch [1/3], Step [301/3236], Loss: 3.4341, Perplexity: 31.0043
Epoch [1/3], Step [302/3236], Loss: 3.1690, Perplexity: 23.7846
Epoch [1/3], Step [303/3236], Loss: 3.2009, Perplexity: 24.5542
Epoch [1/3], Step [304/3236], Loss: 3.2384, Perplexity: 25.4927
Epoch [1/3], Step [305/3236], Loss: 3.4216, Perplexity: 30.6171
Epoch [1/3], Step [306/3236], Loss: 3.7130, Perplexity: 40.9747
Epoch [1/3], Step [307/3236], Loss: 3.2238, Perplexity: 25.1224
Epoch [1/3], Step [308/3236], Loss: 3.3303, Perplexity: 27.9473
Epoch [1/3], Step [309/3236], Loss: 3.8373, Perplexity: 46.4014
Epoch [1/3], Step [310/3236], Loss: 3.1579, Perplexity: 23.5208
Epoch [1/3], Step [311/3236], Loss: 3.4803, Perplexity: 32.4696
Epoch [1/3], Step [312/3236], Loss: 3.4005, Perplexity: 29.9789
Epoch [1/3], Step [313/3236], Loss: 3.3987, Perplexity: 29.9240
Epoch [1/3], Step [314/3236], Loss: 3.1196, Perplexity: 22.6373
Epoch [1/3], Step [315/3236], Loss: 3.2362, Perplexity: 25.4373
Epoch [1/3], Step [316/3236], Loss: 3.1371, Perplexity: 23.0364
Epoch [1/3], Step [317/3236], Loss: 3.2052, Perplexity: 24.6615
Epoch [1/3], Step [318/3236], Loss: 3.2235, Perplexity: 25.1154
Epoch [1/3], Step [319/3236], Loss: 3.2452, Perplexity: 25.6677
Epoch [1/3], Step [320/3236], Loss: 3.6777, Perplexity: 39.5544
Epoch [1/3], Step [321/3236], Loss: 3.3078, Perplexity: 27.3255
Epoch [1/3], Step [322/3236], Loss: 3.8821, Perplexity: 48.5278
Epoch [1/3], Step [323/3236], Loss: 3.2390, Perplexity: 25.5084
Epoch [1/3], Step [324/3236], Loss: 3.1882, Perplexity: 24.2442
Epoch [1/3], Step [325/3236], Loss: 3.5282, Perplexity: 34.0627
Epoch [1/3], Step [326/3236], Loss: 3.3296, Perplexity: 27.9258
Epoch [1/3], Step [327/3236], Loss: 3.5578, Perplexity: 35.0876
Epoch [1/3], Step [328/3236], Loss: 3.1090, Perplexity: 22.3997
Epoch [1/3], Step [329/3236], Loss: 3.2927, Perplexity: 26.9153
Epoch [1/3], Step [330/3236], Loss: 3.3347, Perplexity: 28.0696
Epoch [1/3], Step [331/3236], Loss: 3.0945, Perplexity: 22.0757
Epoch [1/3], Step [332/3236], Loss: 3.7099, Perplexity: 40.8500
Epoch [1/3], Step [333/3236], Loss: 3.1199, Perplexity: 22.6432
Epoch [1/3], Step [334/3236], Loss: 3.3782, Perplexity: 29.3165
Epoch [1/3], Step [335/3236], Loss: 3.2099, Perplexity: 24.7769
Epoch [1/3], Step [336/3236], Loss: 3.4216, Perplexity: 30.6186
Epoch [1/3], Step [337/3236], Loss: 3.2510, Perplexity: 25.8173
Epoch [1/3], Step [338/3236], Loss: 3.2412, Perplexity: 25.5633
Epoch [1/3], Step [339/3236], Loss: 3.3506, Perplexity: 28.5189
Epoch [1/3], Step [340/3236], Loss: 3.2224, Perplexity: 25.0875
Epoch [1/3], Step [341/3236], Loss: 3.1393, Perplexity: 23.0877
Epoch [1/3], Step [342/3236], Loss: 3.5323, Perplexity: 34.2040
Epoch [1/3], Step [343/3236], Loss: 3.1978, Perplexity: 24.4780
Epoch [1/3], Step [344/3236], Loss: 3.4200, Perplexity: 30.5681
Epoch [1/3], Step [345/3236], Loss: 3.2121, Perplexity: 24.8317
Epoch [1/3], Step [346/3236], Loss: 3.5228, Perplexity: 33.8791
Epoch [1/3], Step [347/3236], Loss: 3.4949, Perplexity: 32.9457
Epoch [1/3], Step [348/3236], Loss: 3.6132, Perplexity: 37.0838
Epoch [1/3], Step [349/3236], Loss: 3.0852, Perplexity: 21.8723
Epoch [1/3], Step [350/3236], Loss: 3.2841, Perplexity: 26.6850
Epoch [1/3], Step [351/3236], Loss: 3.2916, Perplexity: 26.8857
Epoch [1/3], Step [352/3236], Loss: 3.1794, Perplexity: 24.0317
Epoch [1/3], Step [353/3236], Loss: 3.1455, Perplexity: 23.2316
Epoch [1/3], Step [354/3236], Loss: 3.1721, Perplexity: 23.8577
Epoch [1/3], Step [355/3236], Loss: 3.2100, Perplexity: 24.7785
Epoch [1/3], Step [356/3236], Loss: 3.1337, Perplexity: 22.9595
Epoch [1/3], Step [357/3236], Loss: 3.2997, Perplexity: 27.1033
Epoch [1/3], Step [358/3236], Loss: 3.1215, Perplexity: 22.6812
Epoch [1/3], Step [359/3236], Loss: 3.2993, Perplexity: 27.0928
Epoch [1/3], Step [360/3236], Loss: 4.0909, Perplexity: 59.7943
Epoch [1/3], Step [361/3236], Loss: 3.1566, Perplexity: 23.4896
Epoch [1/3], Step [362/3236], Loss: 3.5650, Perplexity: 35.3398
Epoch [1/3], Step [363/3236], Loss: 3.2972, Perplexity: 27.0377
Epoch [1/3], Step [364/3236], Loss: 3.0871, Perplexity: 21.9131
Epoch [1/3], Step [365/3236], Loss: 3.2033, Perplexity: 24.6127
Epoch [1/3], Step [366/3236], Loss: 3.1515, Perplexity: 23.3712
Epoch [1/3], Step [367/3236], Loss: 3.2562, Perplexity: 25.9508
Epoch [1/3], Step [368/3236], Loss: 3.0977, Perplexity: 22.1471
Epoch [1/3], Step [369/3236], Loss: 3.2012, Perplexity: 24.5619
Epoch [1/3], Step [370/3236], Loss: 3.4185, Perplexity: 30.5238
Epoch [1/3], Step [371/3236], Loss: 3.5416, Perplexity: 34.5233
Epoch [1/3], Step [372/3236], Loss: 3.2485, Perplexity: 25.7512
Epoch [1/3], Step [373/3236], Loss: 3.5711, Perplexity: 35.5552
Epoch [1/3], Step [374/3236], Loss: 3.2591, Perplexity: 26.0253
Epoch [1/3], Step [375/3236], Loss: 3.1464, Perplexity: 23.2524
Epoch [1/3], Step [376/3236], Loss: 3.1437, Perplexity: 23.1889
Epoch [1/3], Step [377/3236], Loss: 3.5383, Perplexity: 34.4079
Epoch [1/3], Step [378/3236], Loss: 3.4070, Perplexity: 30.1747
Epoch [1/3], Step [379/3236], Loss: 4.2136, Perplexity: 67.6020
Epoch [1/3], Step [380/3236], Loss: 3.3290, Perplexity: 27.9096
Epoch [1/3], Step [381/3236], Loss: 3.9271, Perplexity: 50.7605
Epoch [1/3], Step [382/3236], Loss: 3.2456, Perplexity: 25.6773
Epoch [1/3], Step [383/3236], Loss: 3.1288, Perplexity: 22.8460
Epoch [1/3], Step [384/3236], Loss: 3.2715, Perplexity: 26.3498
Epoch [1/3], Step [385/3236], Loss: 3.2851, Perplexity: 26.7121
Epoch [1/3], Step [386/3236], Loss: 3.1629, Perplexity: 23.6394
Epoch [1/3], Step [387/3236], Loss: 3.1661, Perplexity: 23.7155
Epoch [1/3], Step [388/3236], Loss: 3.0834, Perplexity: 21.8328
Epoch [1/3], Step [389/3236], Loss: 3.0974, Perplexity: 22.1408
Epoch [1/3], Step [390/3236], Loss: 3.7581, Perplexity: 42.8671
Epoch [1/3], Step [391/3236], Loss: 3.1623, Perplexity: 23.6243
Epoch [1/3], Step [392/3236], Loss: 3.1598, Perplexity: 23.5662
Epoch [1/3], Step [393/3236], Loss: 3.1898, Perplexity: 24.2827
Epoch [1/3], Step [394/3236], Loss: 3.3020, Perplexity: 27.1657
Epoch [1/3], Step [395/3236], Loss: 3.1496, Perplexity: 23.3277
Epoch [1/3], Step [396/3236], Loss: 4.3625, Perplexity: 78.4542
Epoch [1/3], Step [397/3236], Loss: 3.2371, Perplexity: 25.4599
Epoch [1/3], Step [398/3236], Loss: 2.8794, Perplexity: 17.8030
Epoch [1/3], Step [399/3236], Loss: 3.1983, Perplexity: 24.4900
Epoch [1/3], Step [400/3236], Loss: 3.1680, Perplexity: 23.7606
Epoch [1/3], Step [401/3236], Loss: 3.3688, Perplexity: 29.0446
Epoch [1/3], Step [402/3236], Loss: 3.3054, Perplexity: 27.2595
Epoch [1/3], Step [403/3236], Loss: 3.0837, Perplexity: 21.8389
Epoch [1/3], Step [404/3236], Loss: 3.1750, Perplexity: 23.9269
Epoch [1/3], Step [405/3236], Loss: 3.2043, Perplexity: 24.6389
Epoch [1/3], Step [406/3236], Loss: 3.1351, Perplexity: 22.9917
Epoch [1/3], Step [407/3236], Loss: 3.0860, Perplexity: 21.8884
Epoch [1/3], Step [408/3236], Loss: 3.1067, Perplexity: 22.3470
Epoch [1/3], Step [409/3236], Loss: 3.0868, Perplexity: 21.9067
Epoch [1/3], Step [410/3236], Loss: 3.1468, Perplexity: 23.2613
Epoch [1/3], Step [411/3236], Loss: 3.1052, Perplexity: 22.3148
Epoch [1/3], Step [412/3236], Loss: 3.2867, Perplexity: 26.7555
Epoch [1/3], Step [413/3236], Loss: 3.1007, Perplexity: 22.2141
Epoch [1/3], Step [414/3236], Loss: 3.2181, Perplexity: 24.9802
Epoch [1/3], Step [415/3236], Loss: 2.9817, Perplexity: 19.7208
Epoch [1/3], Step [416/3236], Loss: 3.3853, Perplexity: 29.5276
Epoch [1/3], Step [417/3236], Loss: 3.0649, Perplexity: 21.4319
Epoch [1/3], Step [418/3236], Loss: 3.1691, Perplexity: 23.7860
Epoch [1/3], Step [419/3236], Loss: 3.0897, Perplexity: 21.9713
Epoch [1/3], Step [420/3236], Loss: 3.3743, Perplexity: 29.2045
Epoch [1/3], Step [421/3236], Loss: 3.3470, Perplexity: 28.4161
Epoch [1/3], Step [422/3236], Loss: 3.3306, Perplexity: 27.9549
Epoch [1/3], Step [423/3236], Loss: 3.0222, Perplexity: 20.5357
Epoch [1/3], Step [424/3236], Loss: 3.0854, Perplexity: 21.8758
Epoch [1/3], Step [425/3236], Loss: 3.0674, Perplexity: 21.4854
Epoch [1/3], Step [426/3236], Loss: 3.1354, Perplexity: 22.9969
Epoch [1/3], Step [427/3236], Loss: 3.5919, Perplexity: 36.3013
Epoch [1/3], Step [428/3236], Loss: 3.1457, Perplexity: 23.2359
Epoch [1/3], Step [429/3236], Loss: 3.1645, Perplexity: 23.6777
Epoch [1/3], Step [430/3236], Loss: 3.1993, Perplexity: 24.5163
Epoch [1/3], Step [431/3236], Loss: 3.1545, Perplexity: 23.4409
Epoch [1/3], Step [432/3236], Loss: 3.0273, Perplexity: 20.6412
Epoch [1/3], Step [433/3236], Loss: 3.1909, Perplexity: 24.3106
Epoch [1/3], Step [434/3236], Loss: 3.3600, Perplexity: 28.7899
Epoch [1/3], Step [435/3236], Loss: 3.3390, Perplexity: 28.1919
Epoch [1/3], Step [436/3236], Loss: 3.2188, Perplexity: 24.9974
Epoch [1/3], Step [437/3236], Loss: 3.0250, Perplexity: 20.5947
Epoch [1/3], Step [438/3236], Loss: 3.0671, Perplexity: 21.4787
Epoch [1/3], Step [439/3236], Loss: 3.1214, Perplexity: 22.6782
Epoch [1/3], Step [440/3236], Loss: 3.2172, Perplexity: 24.9593
Epoch [1/3], Step [441/3236], Loss: 3.2774, Perplexity: 26.5066
Epoch [1/3], Step [442/3236], Loss: 3.1121, Perplexity: 22.4686
Epoch [1/3], Step [443/3236], Loss: 3.1786, Perplexity: 24.0138
Epoch [1/3], Step [444/3236], Loss: 3.5328, Perplexity: 34.2181
Epoch [1/3], Step [445/3236], Loss: 3.6481, Perplexity: 38.4010
Epoch [1/3], Step [446/3236], Loss: 2.9653, Perplexity: 19.3996
Epoch [1/3], Step [447/3236], Loss: 3.1495, Perplexity: 23.3246
Epoch [1/3], Step [448/3236], Loss: 3.2048, Perplexity: 24.6499
Epoch [1/3], Step [449/3236], Loss: 3.0597, Perplexity: 21.3221
Epoch [1/3], Step [450/3236], Loss: 3.2402, Perplexity: 25.5391
Epoch [1/3], Step [451/3236], Loss: 3.2487, Perplexity: 25.7573
Epoch [1/3], Step [452/3236], Loss: 3.5940, Perplexity: 36.3796
Epoch [1/3], Step [453/3236], Loss: 3.3191, Perplexity: 27.6347
Epoch [1/3], Step [454/3236], Loss: 3.0537, Perplexity: 21.1931
Epoch [1/3], Step [455/3236], Loss: 3.1197, Perplexity: 22.6405
Epoch [1/3], Step [456/3236], Loss: 3.5730, Perplexity: 35.6228
Epoch [1/3], Step [457/3236], Loss: 2.9531, Perplexity: 19.1645
Epoch [1/3], Step [458/3236], Loss: 3.0264, Perplexity: 20.6234
Epoch [1/3], Step [459/3236], Loss: 3.2685, Perplexity: 26.2710
Epoch [1/3], Step [460/3236], Loss: 3.1043, Perplexity: 22.2932
Epoch [1/3], Step [461/3236], Loss: 3.4249, Perplexity: 30.7181
Epoch [1/3], Step [462/3236], Loss: 3.2145, Perplexity: 24.8913
Epoch [1/3], Step [463/3236], Loss: 3.7458, Perplexity: 42.3423
Epoch [1/3], Step [464/3236], Loss: 3.2292, Perplexity: 25.2601
Epoch [1/3], Step [465/3236], Loss: 3.0337, Perplexity: 20.7747
Epoch [1/3], Step [466/3236], Loss: 3.2926, Perplexity: 26.9121
Epoch [1/3], Step [467/3236], Loss: 3.1942, Perplexity: 24.3912
Epoch [1/3], Step [468/3236], Loss: 3.0430, Perplexity: 20.9671
Epoch [1/3], Step [469/3236], Loss: 2.9626, Perplexity: 19.3478
Epoch [1/3], Step [470/3236], Loss: 3.1765, Perplexity: 23.9635
Epoch [1/3], Step [471/3236], Loss: 3.0605, Perplexity: 21.3381
Epoch [1/3], Step [472/3236], Loss: 3.0815, Perplexity: 21.7916
Epoch [1/3], Step [473/3236], Loss: 3.2175, Perplexity: 24.9653
Epoch [1/3], Step [474/3236], Loss: 3.2421, Perplexity: 25.5875
Epoch [1/3], Step [475/3236], Loss: 3.1007, Perplexity: 22.2140
Epoch [1/3], Step [476/3236], Loss: 3.3879, Perplexity: 29.6044
Epoch [1/3], Step [477/3236], Loss: 2.9501, Perplexity: 19.1076
Epoch [1/3], Step [478/3236], Loss: 3.1079, Perplexity: 22.3749
Epoch [1/3], Step [479/3236], Loss: 3.1732, Perplexity: 23.8836
Epoch [1/3], Step [480/3236], Loss: 3.1350, Perplexity: 22.9894
Epoch [1/3], Step [481/3236], Loss: 3.2022, Perplexity: 24.5860
Epoch [1/3], Step [482/3236], Loss: 3.3291, Perplexity: 27.9133
Epoch [1/3], Step [483/3236], Loss: 2.8677, Perplexity: 17.5971
Epoch [1/3], Step [484/3236], Loss: 3.3279, Perplexity: 27.8808
Epoch [1/3], Step [485/3236], Loss: 2.9924, Perplexity: 19.9328
Epoch [1/3], Step [486/3236], Loss: 3.0859, Perplexity: 21.8870
Epoch [1/3], Step [487/3236], Loss: 3.0589, Perplexity: 21.3048
Epoch [1/3], Step [488/3236], Loss: 2.9630, Perplexity: 19.3569
Epoch [1/3], Step [489/3236], Loss: 2.9618, Perplexity: 19.3334
Epoch [1/3], Step [490/3236], Loss: 3.1235, Perplexity: 22.7253
Epoch [1/3], Step [491/3236], Loss: 3.5390, Perplexity: 34.4331
Epoch [1/3], Step [492/3236], Loss: 3.1626, Perplexity: 23.6322
Epoch [1/3], Step [493/3236], Loss: 3.2692, Perplexity: 26.2893
Epoch [1/3], Step [494/3236], Loss: 2.9986, Perplexity: 20.0571
Epoch [1/3], Step [495/3236], Loss: 3.2133, Perplexity: 24.8621
Epoch [1/3], Step [496/3236], Loss: 3.3515, Perplexity: 28.5460
Epoch [1/3], Step [497/3236], Loss: 3.0910, Perplexity: 21.9994
Epoch [1/3], Step [498/3236], Loss: 3.2294, Perplexity: 25.2644
Epoch [1/3], Step [499/3236], Loss: 3.1151, Perplexity: 22.5359
Epoch [1/3], Step [500/3236], Loss: 2.9885, Perplexity: 19.8551
Epoch [1/3], Step [501/3236], Loss: 2.9890, Perplexity: 19.8655
Epoch [1/3], Step [502/3236], Loss: 3.4381, Perplexity: 31.1290
Epoch [1/3], Step [503/3236], Loss: 3.3279, Perplexity: 27.8792
Epoch [1/3], Step [504/3236], Loss: 3.1604, Perplexity: 23.5797
Epoch [1/3], Step [505/3236], Loss: 3.0053, Perplexity: 20.1932
Epoch [1/3], Step [506/3236], Loss: 3.0020, Perplexity: 20.1249
Epoch [1/3], Step [507/3236], Loss: 3.0261, Perplexity: 20.6159
Epoch [1/3], Step [508/3236], Loss: 3.1275, Perplexity: 22.8162
Epoch [1/3], Step [509/3236], Loss: 2.9913, Perplexity: 19.9121
Epoch [1/3], Step [510/3236], Loss: 2.9760, Perplexity: 19.6085
Epoch [1/3], Step [511/3236], Loss: 3.2600, Perplexity: 26.0483
Epoch [1/3], Step [512/3236], Loss: 3.1213, Perplexity: 22.6764
Epoch [1/3], Step [513/3236], Loss: 2.9448, Perplexity: 19.0066
Epoch [1/3], Step [514/3236], Loss: 3.0341, Perplexity: 20.7817
Epoch [1/3], Step [515/3236], Loss: 3.2899, Perplexity: 26.8401
Epoch [1/3], Step [516/3236], Loss: 3.2357, Perplexity: 25.4253
Epoch [1/3], Step [517/3236], Loss: 3.9203, Perplexity: 50.4148
Epoch [1/3], Step [518/3236], Loss: 3.1222, Perplexity: 22.6968
Epoch [1/3], Step [519/3236], Loss: 3.0649, Perplexity: 21.4320
Epoch [1/3], Step [520/3236], Loss: 2.9382, Perplexity: 18.8810
Epoch [1/3], Step [521/3236], Loss: 2.9048, Perplexity: 18.2618
Epoch [1/3], Step [522/3236], Loss: 3.1556, Perplexity: 23.4661
Epoch [1/3], Step [523/3236], Loss: 3.0275, Perplexity: 20.6450
Epoch [1/3], Step [524/3236], Loss: 3.9120, Perplexity: 49.9966
Epoch [1/3], Step [525/3236], Loss: 3.0742, Perplexity: 21.6331
Epoch [1/3], Step [526/3236], Loss: 3.1275, Perplexity: 22.8172
Epoch [1/3], Step [527/3236], Loss: 3.0786, Perplexity: 21.7284
Epoch [1/3], Step [528/3236], Loss: 3.1094, Perplexity: 22.4069
Epoch [1/3], Step [529/3236], Loss: 2.8601, Perplexity: 17.4631
Epoch [1/3], Step [530/3236], Loss: 3.1640, Perplexity: 23.6644
Epoch [1/3], Step [531/3236], Loss: 3.0955, Perplexity: 22.0993
Epoch [1/3], Step [532/3236], Loss: 3.0381, Perplexity: 20.8658
Epoch [1/3], Step [533/3236], Loss: 2.8767, Perplexity: 17.7561
Epoch [1/3], Step [534/3236], Loss: 3.0381, Perplexity: 20.8647
Epoch [1/3], Step [535/3236], Loss: 2.8869, Perplexity: 17.9371
Epoch [1/3], Step [536/3236], Loss: 3.1455, Perplexity: 23.2316
Epoch [1/3], Step [537/3236], Loss: 3.0091, Perplexity: 20.2685
Epoch [1/3], Step [538/3236], Loss: 3.0695, Perplexity: 21.5320
Epoch [1/3], Step [539/3236], Loss: 3.0997, Perplexity: 22.1912
Epoch [1/3], Step [540/3236], Loss: 3.0416, Perplexity: 20.9384
Epoch [1/3], Step [541/3236], Loss: 3.0235, Perplexity: 20.5635
Epoch [1/3], Step [542/3236], Loss: 2.9669, Perplexity: 19.4310
Epoch [1/3], Step [543/3236], Loss: 2.9589, Perplexity: 19.2769
Epoch [1/3], Step [544/3236], Loss: 3.5408, Perplexity: 34.4958
Epoch [1/3], Step [545/3236], Loss: 2.8109, Perplexity: 16.6254
Epoch [1/3], Step [546/3236], Loss: 3.4533, Perplexity: 31.6060
Epoch [1/3], Step [547/3236], Loss: 2.9369, Perplexity: 18.8579
Epoch [1/3], Step [548/3236], Loss: 2.8957, Perplexity: 18.0953
Epoch [1/3], Step [549/3236], Loss: 3.1468, Perplexity: 23.2616
Epoch [1/3], Step [550/3236], Loss: 3.2036, Perplexity: 24.6208
Epoch [1/3], Step [551/3236], Loss: 3.0223, Perplexity: 20.5383
Epoch [1/3], Step [552/3236], Loss: 3.1834, Perplexity: 24.1278
Epoch [1/3], Step [553/3236], Loss: 3.0233, Perplexity: 20.5599
Epoch [1/3], Step [554/3236], Loss: 2.9568, Perplexity: 19.2370
Epoch [1/3], Step [555/3236], Loss: 3.0089, Perplexity: 20.2644
Epoch [1/3], Step [556/3236], Loss: 3.2934, Perplexity: 26.9330
Epoch [1/3], Step [557/3236], Loss: 2.8735, Perplexity: 17.6981
Epoch [1/3], Step [558/3236], Loss: 3.2545, Perplexity: 25.9068
Epoch [1/3], Step [559/3236], Loss: 3.0050, Perplexity: 20.1862
Epoch [1/3], Step [560/3236], Loss: 4.1281, Perplexity: 62.0592
Epoch [1/3], Step [561/3236], Loss: 2.7944, Perplexity: 16.3523
Epoch [1/3], Step [562/3236], Loss: 3.5659, Perplexity: 35.3721
Epoch [1/3], Step [563/3236], Loss: 2.8977, Perplexity: 18.1321
Epoch [1/3], Step [564/3236], Loss: 2.8131, Perplexity: 16.6610
Epoch [1/3], Step [565/3236], Loss: 3.2383, Perplexity: 25.4892
Epoch [1/3], Step [566/3236], Loss: 3.0320, Perplexity: 20.7377
Epoch [1/3], Step [567/3236], Loss: 3.0220, Perplexity: 20.5327
Epoch [1/3], Step [568/3236], Loss: 2.8569, Perplexity: 17.4076
Epoch [1/3], Step [569/3236], Loss: 3.1077, Perplexity: 22.3698
Epoch [1/3], Step [570/3236], Loss: 2.9023, Perplexity: 18.2163
Epoch [1/3], Step [571/3236], Loss: 3.1340, Perplexity: 22.9656
Epoch [1/3], Step [572/3236], Loss: 2.8252, Perplexity: 16.8640
Epoch [1/3], Step [573/3236], Loss: 3.1192, Perplexity: 22.6292
Epoch [1/3], Step [574/3236], Loss: 3.0837, Perplexity: 21.8395
Epoch [1/3], Step [575/3236], Loss: 3.5916, Perplexity: 36.2903
Epoch [1/3], Step [576/3236], Loss: 3.1552, Perplexity: 23.4583
Epoch [1/3], Step [577/3236], Loss: 3.0023, Perplexity: 20.1322
Epoch [1/3], Step [578/3236], Loss: 3.0774, Perplexity: 21.7027
Epoch [1/3], Step [579/3236], Loss: 2.9280, Perplexity: 18.6896
Epoch [1/3], Step [580/3236], Loss: 3.0049, Perplexity: 20.1838
Epoch [1/3], Step [581/3236], Loss: 3.2909, Perplexity: 26.8683
Epoch [1/3], Step [582/3236], Loss: 3.0178, Perplexity: 20.4467
Epoch [1/3], Step [583/3236], Loss: 2.9868, Perplexity: 19.8216
Epoch [1/3], Step [584/3236], Loss: 3.1249, Perplexity: 22.7577
Epoch [1/3], Step [585/3236], Loss: 3.0071, Perplexity: 20.2281
Epoch [1/3], Step [586/3236], Loss: 2.9826, Perplexity: 19.7388
Epoch [1/3], Step [587/3236], Loss: 3.0678, Perplexity: 21.4954
Epoch [1/3], Step [588/3236], Loss: 3.0665, Perplexity: 21.4658
Epoch [1/3], Step [589/3236], Loss: 2.7551, Perplexity: 15.7225
Epoch [1/3], Step [590/3236], Loss: 2.9974, Perplexity: 20.0334
Epoch [1/3], Step [591/3236], Loss: 2.8889, Perplexity: 17.9729
Epoch [1/3], Step [592/3236], Loss: 3.0494, Perplexity: 21.1027
Epoch [1/3], Step [593/3236], Loss: 2.8924, Perplexity: 18.0369
Epoch [1/3], Step [594/3236], Loss: 3.0008, Perplexity: 20.1009
Epoch [1/3], Step [595/3236], Loss: 2.9333, Perplexity: 18.7888
Epoch [1/3], Step [596/3236], Loss: 2.9468, Perplexity: 19.0440
Epoch [1/3], Step [597/3236], Loss: 3.1158, Perplexity: 22.5521
Epoch [1/3], Step [598/3236], Loss: 3.0798, Perplexity: 21.7536
Epoch [1/3], Step [599/3236], Loss: 3.0428, Perplexity: 20.9641
Epoch [1/3], Step [600/3236], Loss: 3.4334, Perplexity: 30.9808
Epoch [1/3], Step [601/3236], Loss: 2.9844, Perplexity: 19.7756
Epoch [1/3], Step [602/3236], Loss: 3.3333, Perplexity: 28.0301
Epoch [1/3], Step [603/3236], Loss: 2.9104, Perplexity: 18.3639
Epoch [1/3], Step [604/3236], Loss: 2.8089, Perplexity: 16.5908
Epoch [1/3], Step [605/3236], Loss: 2.9325, Perplexity: 18.7747
Epoch [1/3], Step [606/3236], Loss: 2.6438, Perplexity: 14.0661
Epoch [1/3], Step [607/3236], Loss: 2.8879, Perplexity: 17.9547
Epoch [1/3], Step [608/3236], Loss: 3.6907, Perplexity: 40.0718
Epoch [1/3], Step [609/3236], Loss: 2.9514, Perplexity: 19.1320
Epoch [1/3], Step [610/3236], Loss: 3.2335, Perplexity: 25.3683
Epoch [1/3], Step [611/3236], Loss: 2.9268, Perplexity: 18.6683
Epoch [1/3], Step [612/3236], Loss: 2.9810, Perplexity: 19.7072
Epoch [1/3], Step [613/3236], Loss: 2.8173, Perplexity: 16.7320
Epoch [1/3], Step [614/3236], Loss: 3.0647, Perplexity: 21.4285
Epoch [1/3], Step [615/3236], Loss: 3.3390, Perplexity: 28.1917
Epoch [1/3], Step [616/3236], Loss: 3.0478, Perplexity: 21.0689
Epoch [1/3], Step [617/3236], Loss: 3.5557, Perplexity: 35.0121
Epoch [1/3], Step [618/3236], Loss: 2.9994, Perplexity: 20.0734
Epoch [1/3], Step [619/3236], Loss: 3.4499, Perplexity: 31.4962
Epoch [1/3], Step [620/3236], Loss: 3.0856, Perplexity: 21.8808
Epoch [1/3], Step [621/3236], Loss: 3.1264, Perplexity: 22.7926
Epoch [1/3], Step [622/3236], Loss: 3.1785, Perplexity: 24.0097
Epoch [1/3], Step [623/3236], Loss: 3.2422, Perplexity: 25.5888
Epoch [1/3], Step [624/3236], Loss: 2.8827, Perplexity: 17.8618
Epoch [1/3], Step [625/3236], Loss: 3.0634, Perplexity: 21.3998
Epoch [1/3], Step [626/3236], Loss: 3.2150, Perplexity: 24.9030
Epoch [1/3], Step [627/3236], Loss: 3.2357, Perplexity: 25.4236
Epoch [1/3], Step [628/3236], Loss: 3.0666, Perplexity: 21.4688
Epoch [1/3], Step [629/3236], Loss: 3.4814, Perplexity: 32.5061
Epoch [1/3], Step [630/3236], Loss: 2.9719, Perplexity: 19.5285
Epoch [1/3], Step [631/3236], Loss: 3.0062, Perplexity: 20.2102
Epoch [1/3], Step [632/3236], Loss: 3.1060, Perplexity: 22.3323
Epoch [1/3], Step [633/3236], Loss: 3.2881, Perplexity: 26.7921
Epoch [1/3], Step [634/3236], Loss: 3.0194, Perplexity: 20.4795
Epoch [1/3], Step [635/3236], Loss: 3.0102, Perplexity: 20.2910
Epoch [1/3], Step [636/3236], Loss: 2.9658, Perplexity: 19.4111
Epoch [1/3], Step [637/3236], Loss: 2.8641, Perplexity: 17.5326
Epoch [1/3], Step [638/3236], Loss: 2.9813, Perplexity: 19.7138
Epoch [1/3], Step [639/3236], Loss: 3.0147, Perplexity: 20.3821
Epoch [1/3], Step [640/3236], Loss: 3.0401, Perplexity: 20.9081
Epoch [1/3], Step [641/3236], Loss: 3.3188, Perplexity: 27.6259
Epoch [1/3], Step [642/3236], Loss: 2.8626, Perplexity: 17.5078
Epoch [1/3], Step [643/3236], Loss: 2.9795, Perplexity: 19.6778
Epoch [1/3], Step [644/3236], Loss: 3.0151, Perplexity: 20.3910
Epoch [1/3], Step [645/3236], Loss: 3.0133, Perplexity: 20.3551
Epoch [1/3], Step [646/3236], Loss: 3.0329, Perplexity: 20.7575
Epoch [1/3], Step [647/3236], Loss: 2.8096, Perplexity: 16.6036
Epoch [1/3], Step [648/3236], Loss: 2.9128, Perplexity: 18.4079
Epoch [1/3], Step [649/3236], Loss: 3.0306, Perplexity: 20.7094
Epoch [1/3], Step [650/3236], Loss: 2.8431, Perplexity: 17.1685
Epoch [1/3], Step [651/3236], Loss: 2.9709, Perplexity: 19.5090
Epoch [1/3], Step [652/3236], Loss: 2.9102, Perplexity: 18.3604
Epoch [1/3], Step [653/3236], Loss: 3.4424, Perplexity: 31.2633
Epoch [1/3], Step [654/3236], Loss: 2.9805, Perplexity: 19.6976
Epoch [1/3], Step [655/3236], Loss: 2.9707, Perplexity: 19.5057
Epoch [1/3], Step [656/3236], Loss: 3.2333, Perplexity: 25.3635
Epoch [1/3], Step [657/3236], Loss: 3.0122, Perplexity: 20.3330
Epoch [1/3], Step [658/3236], Loss: 3.1044, Perplexity: 22.2968
Epoch [1/3], Step [659/3236], Loss: 3.1941, Perplexity: 24.3873
Epoch [1/3], Step [660/3236], Loss: 2.8938, Perplexity: 18.0621
Epoch [1/3], Step [661/3236], Loss: 3.2486, Perplexity: 25.7543
Epoch [1/3], Step [662/3236], Loss: 3.6519, Perplexity: 38.5489
Epoch [1/3], Step [663/3236], Loss: 2.9427, Perplexity: 18.9665
Epoch [1/3], Step [664/3236], Loss: 2.8854, Perplexity: 17.9114
Epoch [1/3], Step [665/3236], Loss: 3.7665, Perplexity: 43.2283
Epoch [1/3], Step [666/3236], Loss: 2.9400, Perplexity: 18.9160
Epoch [1/3], Step [667/3236], Loss: 2.8787, Perplexity: 17.7910
Epoch [1/3], Step [668/3236], Loss: 2.9994, Perplexity: 20.0727
Epoch [1/3], Step [669/3236], Loss: 2.8281, Perplexity: 16.9137
Epoch [1/3], Step [670/3236], Loss: 2.8538, Perplexity: 17.3540
Epoch [1/3], Step [671/3236], Loss: 3.1478, Perplexity: 23.2859
Epoch [1/3], Step [672/3236], Loss: 3.0667, Perplexity: 21.4710
Epoch [1/3], Step [673/3236], Loss: 3.0821, Perplexity: 21.8034
Epoch [1/3], Step [674/3236], Loss: 3.0650, Perplexity: 21.4355
Epoch [1/3], Step [675/3236], Loss: 2.9674, Perplexity: 19.4407
Epoch [1/3], Step [676/3236], Loss: 2.9773, Perplexity: 19.6354
Epoch [1/3], Step [677/3236], Loss: 2.8304, Perplexity: 16.9524
Epoch [1/3], Step [678/3236], Loss: 2.9158, Perplexity: 18.4644
Epoch [1/3], Step [679/3236], Loss: 2.6886, Perplexity: 14.7107
Epoch [1/3], Step [680/3236], Loss: 2.8394, Perplexity: 17.1051
Epoch [1/3], Step [681/3236], Loss: 2.9582, Perplexity: 19.2632
Epoch [1/3], Step [682/3236], Loss: 2.8847, Perplexity: 17.8988
Epoch [1/3], Step [683/3236], Loss: 2.8860, Perplexity: 17.9223
Epoch [1/3], Step [684/3236], Loss: 3.1201, Perplexity: 22.6493
Epoch [1/3], Step [685/3236], Loss: 2.9200, Perplexity: 18.5417
Epoch [1/3], Step [686/3236], Loss: 2.9877, Perplexity: 19.8395
Epoch [1/3], Step [687/3236], Loss: 2.8934, Perplexity: 18.0549
Epoch [1/3], Step [688/3236], Loss: 3.1053, Perplexity: 22.3151
Epoch [1/3], Step [689/3236], Loss: 2.7365, Perplexity: 15.4332
Epoch [1/3], Step [690/3236], Loss: 3.7845, Perplexity: 44.0115
Epoch [1/3], Step [691/3236], Loss: 2.9026, Perplexity: 18.2217
Epoch [1/3], Step [692/3236], Loss: 2.8648, Perplexity: 17.5450
Epoch [1/3], Step [693/3236], Loss: 2.8528, Perplexity: 17.3366
Epoch [1/3], Step [694/3236], Loss: 3.0784, Perplexity: 21.7236
Epoch [1/3], Step [695/3236], Loss: 2.8322, Perplexity: 16.9824
Epoch [1/3], Step [696/3236], Loss: 2.9726, Perplexity: 19.5427
Epoch [1/3], Step [697/3236], Loss: 2.9949, Perplexity: 19.9832
Epoch [1/3], Step [698/3236], Loss: 3.0182, Perplexity: 20.4551
Epoch [1/3], Step [699/3236], Loss: 2.9871, Perplexity: 19.8272
Epoch [1/3], Step [700/3236], Loss: 3.0217, Perplexity: 20.5255
Epoch [1/3], Step [701/3236], Loss: 2.8337, Perplexity: 17.0084
Epoch [1/3], Step [702/3236], Loss: 3.1129, Perplexity: 22.4858
Epoch [1/3], Step [703/3236], Loss: 2.8002, Perplexity: 16.4479
Epoch [1/3], Step [704/3236], Loss: 2.9070, Perplexity: 18.3020
Epoch [1/3], Step [705/3236], Loss: 2.7436, Perplexity: 15.5427
Epoch [1/3], Step [706/3236], Loss: 3.3622, Perplexity: 28.8534
Epoch [1/3], Step [707/3236], Loss: 2.7377, Perplexity: 15.4516
Epoch [1/3], Step [708/3236], Loss: 2.8651, Perplexity: 17.5508
Epoch [1/3], Step [709/3236], Loss: 2.8832, Perplexity: 17.8716
Epoch [1/3], Step [710/3236], Loss: 3.1702, Perplexity: 23.8128
Epoch [1/3], Step [711/3236], Loss: 2.9184, Perplexity: 18.5111
Epoch [1/3], Step [712/3236], Loss: 3.2056, Perplexity: 24.6714
Epoch [1/3], Step [713/3236], Loss: 2.8647, Perplexity: 17.5444
Epoch [1/3], Step [714/3236], Loss: 3.0800, Perplexity: 21.7575
Epoch [1/3], Step [715/3236], Loss: 2.9860, Perplexity: 19.8062
Epoch [1/3], Step [716/3236], Loss: 3.2609, Perplexity: 26.0718
Epoch [1/3], Step [717/3236], Loss: 2.8915, Perplexity: 18.0211
Epoch [1/3], Step [718/3236], Loss: 2.7804, Perplexity: 16.1249
Epoch [1/3], Step [719/3236], Loss: 2.8816, Perplexity: 17.8422
Epoch [1/3], Step [720/3236], Loss: 2.5759, Perplexity: 13.1435
Epoch [1/3], Step [721/3236], Loss: 2.8522, Perplexity: 17.3257
Epoch [1/3], Step [722/3236], Loss: 2.6952, Perplexity: 14.8081
Epoch [1/3], Step [723/3236], Loss: 2.8874, Perplexity: 17.9466
Epoch [1/3], Step [724/3236], Loss: 3.1312, Perplexity: 22.9020
Epoch [1/3], Step [725/3236], Loss: 2.7569, Perplexity: 15.7512
Epoch [1/3], Step [726/3236], Loss: 2.8532, Perplexity: 17.3425
Epoch [1/3], Step [727/3236], Loss: 2.8754, Perplexity: 17.7323
Epoch [1/3], Step [728/3236], Loss: 2.8815, Perplexity: 17.8414
Epoch [1/3], Step [729/3236], Loss: 2.6185, Perplexity: 13.7156
Epoch [1/3], Step [730/3236], Loss: 2.7917, Perplexity: 16.3092
Epoch [1/3], Step [731/3236], Loss: 2.9114, Perplexity: 18.3823
Epoch [1/3], Step [732/3236], Loss: 2.9020, Perplexity: 18.2110
Epoch [1/3], Step [733/3236], Loss: 3.0667, Perplexity: 21.4716
Epoch [1/3], Step [734/3236], Loss: 3.0283, Perplexity: 20.6616
Epoch [1/3], Step [735/3236], Loss: 3.3465, Perplexity: 28.4037
Epoch [1/3], Step [736/3236], Loss: 2.7320, Perplexity: 15.3629
Epoch [1/3], Step [737/3236], Loss: 2.9496, Perplexity: 19.0985
Epoch [1/3], Step [738/3236], Loss: 2.8309, Perplexity: 16.9602
Epoch [1/3], Step [739/3236], Loss: 2.9157, Perplexity: 18.4622
Epoch [1/3], Step [740/3236], Loss: 3.2892, Perplexity: 26.8209
Epoch [1/3], Step [741/3236], Loss: 2.8166, Perplexity: 16.7197
Epoch [1/3], Step [742/3236], Loss: 3.0774, Perplexity: 21.7020
Epoch [1/3], Step [743/3236], Loss: 3.2096, Perplexity: 24.7699
Epoch [1/3], Step [744/3236], Loss: 3.2951, Perplexity: 26.9812
Epoch [1/3], Step [745/3236], Loss: 2.8587, Perplexity: 17.4384
Epoch [1/3], Step [746/3236], Loss: 3.1125, Perplexity: 22.4761
Epoch [1/3], Step [747/3236], Loss: 2.8654, Perplexity: 17.5554
Epoch [1/3], Step [748/3236], Loss: 2.8876, Perplexity: 17.9499
Epoch [1/3], Step [749/3236], Loss: 2.8558, Perplexity: 17.3878
Epoch [1/3], Step [750/3236], Loss: 2.7952, Perplexity: 16.3659
Epoch [1/3], Step [751/3236], Loss: 2.9779, Perplexity: 19.6462
Epoch [1/3], Step [752/3236], Loss: 2.7929, Perplexity: 16.3291
Epoch [1/3], Step [753/3236], Loss: 3.2362, Perplexity: 25.4372
Epoch [1/3], Step [754/3236], Loss: 2.6972, Perplexity: 14.8386
Epoch [1/3], Step [755/3236], Loss: 2.8370, Perplexity: 17.0645
Epoch [1/3], Step [756/3236], Loss: 3.0466, Perplexity: 21.0433
Epoch [1/3], Step [757/3236], Loss: 3.0188, Perplexity: 20.4662
Epoch [1/3], Step [758/3236], Loss: 2.8614, Perplexity: 17.4864
Epoch [1/3], Step [759/3236], Loss: 2.9053, Perplexity: 18.2711
Epoch [1/3], Step [760/3236], Loss: 2.7732, Perplexity: 16.0106
Epoch [1/3], Step [761/3236], Loss: 2.8456, Perplexity: 17.2114
Epoch [1/3], Step [762/3236], Loss: 2.9437, Perplexity: 18.9868
Epoch [1/3], Step [763/3236], Loss: 2.8160, Perplexity: 16.7091
Epoch [1/3], Step [764/3236], Loss: 2.7582, Perplexity: 15.7711
Epoch [1/3], Step [765/3236], Loss: 2.8130, Perplexity: 16.6595
Epoch [1/3], Step [766/3236], Loss: 2.7884, Perplexity: 16.2547
Epoch [1/3], Step [767/3236], Loss: 2.7506, Perplexity: 15.6517
Epoch [1/3], Step [768/3236], Loss: 2.7973, Perplexity: 16.4002
Epoch [1/3], Step [769/3236], Loss: 2.8793, Perplexity: 17.8013
Epoch [1/3], Step [770/3236], Loss: 2.9800, Perplexity: 19.6884
Epoch [1/3], Step [771/3236], Loss: 2.7052, Perplexity: 14.9571
Epoch [1/3], Step [772/3236], Loss: 2.8774, Perplexity: 17.7679
Epoch [1/3], Step [773/3236], Loss: 2.7005, Perplexity: 14.8866
Epoch [1/3], Step [774/3236], Loss: 2.7952, Perplexity: 16.3665
Epoch [1/3], Step [775/3236], Loss: 2.8126, Perplexity: 16.6523
Epoch [1/3], Step [776/3236], Loss: 2.8077, Perplexity: 16.5724
Epoch [1/3], Step [777/3236], Loss: 2.8769, Perplexity: 17.7586
Epoch [1/3], Step [778/3236], Loss: 2.8200, Perplexity: 16.7770
Epoch [1/3], Step [779/3236], Loss: 2.6814, Perplexity: 14.6050
Epoch [1/3], Step [780/3236], Loss: 3.1490, Perplexity: 23.3136
Epoch [1/3], Step [781/3236], Loss: 2.5938, Perplexity: 13.3799
Epoch [1/3], Step [782/3236], Loss: 2.7908, Perplexity: 16.2942
Epoch [1/3], Step [783/3236], Loss: 2.8472, Perplexity: 17.2393
Epoch [1/3], Step [784/3236], Loss: 2.9147, Perplexity: 18.4429
Epoch [1/3], Step [785/3236], Loss: 2.7867, Perplexity: 16.2281
Epoch [1/3], Step [786/3236], Loss: 2.8382, Perplexity: 17.0847
Epoch [1/3], Step [787/3236], Loss: 2.7561, Perplexity: 15.7388
Epoch [1/3], Step [788/3236], Loss: 3.0030, Perplexity: 20.1453
Epoch [1/3], Step [789/3236], Loss: 2.7982, Perplexity: 16.4155
Epoch [1/3], Step [790/3236], Loss: 3.7346, Perplexity: 41.8719
Epoch [1/3], Step [791/3236], Loss: 2.8019, Perplexity: 16.4756
Epoch [1/3], Step [792/3236], Loss: 2.6922, Perplexity: 14.7641
Epoch [1/3], Step [793/3236], Loss: 2.9362, Perplexity: 18.8449
Epoch [1/3], Step [794/3236], Loss: 2.8707, Perplexity: 17.6502
Epoch [1/3], Step [795/3236], Loss: 2.9171, Perplexity: 18.4882
Epoch [1/3], Step [796/3236], Loss: 2.7108, Perplexity: 15.0409
Epoch [1/3], Step [797/3236], Loss: 2.9720, Perplexity: 19.5309
Epoch [1/3], Step [798/3236], Loss: 2.8097, Perplexity: 16.6056
Epoch [1/3], Step [799/3236], Loss: 2.9914, Perplexity: 19.9139
Epoch [1/3], Step [800/3236], Loss: 3.4460, Perplexity: 31.3745
Epoch [1/3], Step [801/3236], Loss: 2.8354, Perplexity: 17.0371
Epoch [1/3], Step [802/3236], Loss: 3.8516, Perplexity: 47.0683
Epoch [1/3], Step [803/3236], Loss: 2.8393, Perplexity: 17.1033
Epoch [1/3], Step [804/3236], Loss: 2.9585, Perplexity: 19.2694
Epoch [1/3], Step [805/3236], Loss: 2.8007, Perplexity: 16.4555
Epoch [1/3], Step [806/3236], Loss: 2.9421, Perplexity: 18.9550
Epoch [1/3], Step [807/3236], Loss: 2.7587, Perplexity: 15.7792
Epoch [1/3], Step [808/3236], Loss: 2.8495, Perplexity: 17.2789
Epoch [1/3], Step [809/3236], Loss: 2.9405, Perplexity: 18.9257
Epoch [1/3], Step [810/3236], Loss: 2.7043, Perplexity: 14.9435
Epoch [1/3], Step [811/3236], Loss: 2.9957, Perplexity: 19.9993
Epoch [1/3], Step [812/3236], Loss: 2.7584, Perplexity: 15.7743
Epoch [1/3], Step [813/3236], Loss: 2.6503, Perplexity: 14.1582
Epoch [1/3], Step [814/3236], Loss: 2.8129, Perplexity: 16.6590
Epoch [1/3], Step [815/3236], Loss: 2.9199, Perplexity: 18.5394
Epoch [1/3], Step [816/3236], Loss: 2.6886, Perplexity: 14.7115
Epoch [1/3], Step [817/3236], Loss: 2.8255, Perplexity: 16.8691
Epoch [1/3], Step [818/3236], Loss: 3.2042, Perplexity: 24.6368
Epoch [1/3], Step [819/3236], Loss: 2.7061, Perplexity: 14.9715
Epoch [1/3], Step [820/3236], Loss: 2.9081, Perplexity: 18.3215
Epoch [1/3], Step [821/3236], Loss: 2.7864, Perplexity: 16.2232
Epoch [1/3], Step [822/3236], Loss: 2.9376, Perplexity: 18.8706
Epoch [1/3], Step [823/3236], Loss: 2.7820, Perplexity: 16.1508
Epoch [1/3], Step [824/3236], Loss: 3.2008, Perplexity: 24.5525
Epoch [1/3], Step [825/3236], Loss: 2.8232, Perplexity: 16.8301
Epoch [1/3], Step [826/3236], Loss: 2.6852, Perplexity: 14.6613
Epoch [1/3], Step [827/3236], Loss: 2.7927, Perplexity: 16.3244
Epoch [1/3], Step [828/3236], Loss: 2.7304, Perplexity: 15.3386
Epoch [1/3], Step [829/3236], Loss: 3.1283, Perplexity: 22.8345
Epoch [1/3], Step [830/3236], Loss: 2.8350, Perplexity: 17.0299
Epoch [1/3], Step [831/3236], Loss: 2.9350, Perplexity: 18.8208
Epoch [1/3], Step [832/3236], Loss: 3.4094, Perplexity: 30.2475
Epoch [1/3], Step [833/3236], Loss: 2.6525, Perplexity: 14.1891
Epoch [1/3], Step [834/3236], Loss: 2.9953, Perplexity: 19.9909
Epoch [1/3], Step [835/3236], Loss: 3.0369, Perplexity: 20.8407
Epoch [1/3], Step [836/3236], Loss: 2.8906, Perplexity: 18.0038
Epoch [1/3], Step [837/3236], Loss: 2.9593, Perplexity: 19.2848
Epoch [1/3], Step [838/3236], Loss: 3.1723, Perplexity: 23.8614
Epoch [1/3], Step [839/3236], Loss: 2.6558, Perplexity: 14.2370
Epoch [1/3], Step [840/3236], Loss: 2.9573, Perplexity: 19.2459
Epoch [1/3], Step [841/3236], Loss: 2.7959, Perplexity: 16.3778
Epoch [1/3], Step [842/3236], Loss: 2.8474, Perplexity: 17.2429
Epoch [1/3], Step [843/3236], Loss: 2.9036, Perplexity: 18.2401
Epoch [1/3], Step [844/3236], Loss: 2.6988, Perplexity: 14.8617
Epoch [1/3], Step [845/3236], Loss: 2.7209, Perplexity: 15.1933
Epoch [1/3], Step [846/3236], Loss: 2.7797, Perplexity: 16.1143
Epoch [1/3], Step [847/3236], Loss: 2.5914, Perplexity: 13.3485
Epoch [1/3], Step [848/3236], Loss: 2.7366, Perplexity: 15.4350
Epoch [1/3], Step [849/3236], Loss: 2.9229, Perplexity: 18.5951
Epoch [1/3], Step [850/3236], Loss: 2.7428, Perplexity: 15.5299
Epoch [1/3], Step [851/3236], Loss: 2.6923, Perplexity: 14.7656
Epoch [1/3], Step [852/3236], Loss: 2.6822, Perplexity: 14.6179
Epoch [1/3], Step [853/3236], Loss: 2.8745, Perplexity: 17.7170
Epoch [1/3], Step [854/3236], Loss: 2.8339, Perplexity: 17.0124
Epoch [1/3], Step [855/3236], Loss: 2.8170, Perplexity: 16.7273
Epoch [1/3], Step [856/3236], Loss: 2.6425, Perplexity: 14.0480
Epoch [1/3], Step [857/3236], Loss: 2.6589, Perplexity: 14.2806
Epoch [1/3], Step [858/3236], Loss: 2.6965, Perplexity: 14.8273
Epoch [1/3], Step [859/3236], Loss: 2.7729, Perplexity: 16.0046
Epoch [1/3], Step [860/3236], Loss: 2.7460, Perplexity: 15.5800
Epoch [1/3], Step [861/3236], Loss: 2.9927, Perplexity: 19.9395
Epoch [1/3], Step [862/3236], Loss: 2.7531, Perplexity: 15.6913
Epoch [1/3], Step [863/3236], Loss: 3.6542, Perplexity: 38.6354
Epoch [1/3], Step [864/3236], Loss: 2.7441, Perplexity: 15.5511
Epoch [1/3], Step [865/3236], Loss: 2.8720, Perplexity: 17.6723
Epoch [1/3], Step [866/3236], Loss: 2.9451, Perplexity: 19.0121
Epoch [1/3], Step [867/3236], Loss: 2.8268, Perplexity: 16.8920
Epoch [1/3], Step [868/3236], Loss: 2.8857, Perplexity: 17.9153
Epoch [1/3], Step [869/3236], Loss: 2.7509, Perplexity: 15.6569
Epoch [1/3], Step [870/3236], Loss: 2.7079, Perplexity: 14.9982
Epoch [1/3], Step [871/3236], Loss: 2.6553, Perplexity: 14.2290
Epoch [1/3], Step [872/3236], Loss: 2.6158, Perplexity: 13.6779
Epoch [1/3], Step [873/3236], Loss: 3.0600, Perplexity: 21.3269
Epoch [1/3], Step [874/3236], Loss: 2.9437, Perplexity: 18.9865
Epoch [1/3], Step [875/3236], Loss: 2.5929, Perplexity: 13.3688
Epoch [1/3], Step [876/3236], Loss: 2.5483, Perplexity: 12.7847
Epoch [1/3], Step [877/3236], Loss: 2.7057, Perplexity: 14.9645
Epoch [1/3], Step [878/3236], Loss: 2.9063, Perplexity: 18.2888
Epoch [1/3], Step [879/3236], Loss: 2.6345, Perplexity: 13.9364
Epoch [1/3], Step [880/3236], Loss: 3.0275, Perplexity: 20.6450
Epoch [1/3], Step [881/3236], Loss: 2.6879, Perplexity: 14.7001
Epoch [1/3], Step [882/3236], Loss: 2.6297, Perplexity: 13.8694
Epoch [1/3], Step [883/3236], Loss: 2.9339, Perplexity: 18.8011
Epoch [1/3], Step [884/3236], Loss: 2.7206, Perplexity: 15.1902
Epoch [1/3], Step [885/3236], Loss: 2.9351, Perplexity: 18.8235
Epoch [1/3], Step [886/3236], Loss: 3.1631, Perplexity: 23.6426
Epoch [1/3], Step [887/3236], Loss: 2.7594, Perplexity: 15.7907
Epoch [1/3], Step [888/3236], Loss: 2.6755, Perplexity: 14.5196
Epoch [1/3], Step [889/3236], Loss: 2.7852, Perplexity: 16.2027
Epoch [1/3], Step [890/3236], Loss: 2.8985, Perplexity: 18.1478
Epoch [1/3], Step [891/3236], Loss: 2.5813, Perplexity: 13.2142
Epoch [1/3], Step [892/3236], Loss: 2.7128, Perplexity: 15.0712
Epoch [1/3], Step [893/3236], Loss: 2.7591, Perplexity: 15.7852
Epoch [1/3], Step [894/3236], Loss: 2.7094, Perplexity: 15.0200
Epoch [1/3], Step [895/3236], Loss: 2.8054, Perplexity: 16.5329
Epoch [1/3], Step [896/3236], Loss: 2.6428, Perplexity: 14.0531
Epoch [1/3], Step [897/3236], Loss: 2.8414, Perplexity: 17.1399
Epoch [1/3], Step [898/3236], Loss: 2.7542, Perplexity: 15.7078
Epoch [1/3], Step [899/3236], Loss: 2.8806, Perplexity: 17.8248
Epoch [1/3], Step [900/3236], Loss: 3.1789, Perplexity: 24.0203
Epoch [1/3], Step [901/3236], Loss: 2.8711, Perplexity: 17.6565
Epoch [1/3], Step [902/3236], Loss: 2.7891, Perplexity: 16.2665
Epoch [1/3], Step [903/3236], Loss: 2.6611, Perplexity: 14.3126
Epoch [1/3], Step [904/3236], Loss: 2.6236, Perplexity: 13.7850
Epoch [1/3], Step [905/3236], Loss: 3.0395, Perplexity: 20.8950
Epoch [1/3], Step [906/3236], Loss: 2.7066, Perplexity: 14.9781
Epoch [1/3], Step [907/3236], Loss: 3.2984, Perplexity: 27.0706
Epoch [1/3], Step [908/3236], Loss: 2.7804, Perplexity: 16.1257
Epoch [1/3], Step [909/3236], Loss: 2.8034, Perplexity: 16.5005
Epoch [1/3], Step [910/3236], Loss: 2.7387, Perplexity: 15.4666
Epoch [1/3], Step [911/3236], Loss: 3.1512, Perplexity: 23.3634
Epoch [1/3], Step [912/3236], Loss: 2.6574, Perplexity: 14.2597
Epoch [1/3], Step [913/3236], Loss: 2.8272, Perplexity: 16.8988
Epoch [1/3], Step [914/3236], Loss: 2.8784, Perplexity: 17.7863
Epoch [1/3], Step [915/3236], Loss: 2.9021, Perplexity: 18.2124
Epoch [1/3], Step [916/3236], Loss: 2.6719, Perplexity: 14.4680
Epoch [1/3], Step [917/3236], Loss: 2.6910, Perplexity: 14.7466
Epoch [1/3], Step [918/3236], Loss: 2.6315, Perplexity: 13.8939
Epoch [1/3], Step [919/3236], Loss: 2.7486, Perplexity: 15.6205
Epoch [1/3], Step [920/3236], Loss: 2.8519, Perplexity: 17.3203
Epoch [1/3], Step [921/3236], Loss: 2.8599, Perplexity: 17.4591
Epoch [1/3], Step [922/3236], Loss: 2.8785, Perplexity: 17.7877
Epoch [1/3], Step [923/3236], Loss: 2.9287, Perplexity: 18.7025
Epoch [1/3], Step [924/3236], Loss: 2.6594, Perplexity: 14.2876
Epoch [1/3], Step [925/3236], Loss: 3.7789, Perplexity: 43.7696
Epoch [1/3], Step [926/3236], Loss: 2.5910, Perplexity: 13.3435
Epoch [1/3], Step [927/3236], Loss: 2.6072, Perplexity: 13.5607
Epoch [1/3], Step [928/3236], Loss: 2.6752, Perplexity: 14.5147
Epoch [1/3], Step [929/3236], Loss: 2.5252, Perplexity: 12.4934
Epoch [1/3], Step [930/3236], Loss: 2.6529, Perplexity: 14.1949
Epoch [1/3], Step [931/3236], Loss: 2.8349, Perplexity: 17.0282
Epoch [1/3], Step [932/3236], Loss: 2.7502, Perplexity: 15.6465
Epoch [1/3], Step [933/3236], Loss: 3.0746, Perplexity: 21.6414
Epoch [1/3], Step [934/3236], Loss: 2.7649, Perplexity: 15.8770
Epoch [1/3], Step [935/3236], Loss: 3.4656, Perplexity: 31.9955
Epoch [1/3], Step [936/3236], Loss: 2.5006, Perplexity: 12.1902
Epoch [1/3], Step [937/3236], Loss: 2.4600, Perplexity: 11.7051
Epoch [1/3], Step [938/3236], Loss: 2.5935, Perplexity: 13.3763
Epoch [1/3], Step [939/3236], Loss: 2.6653, Perplexity: 14.3729
Epoch [1/3], Step [940/3236], Loss: 2.7608, Perplexity: 15.8125
Epoch [1/3], Step [941/3236], Loss: 2.6175, Perplexity: 13.7008
Epoch [1/3], Step [942/3236], Loss: 2.8000, Perplexity: 16.4447
Epoch [1/3], Step [943/3236], Loss: 2.7286, Perplexity: 15.3110
Epoch [1/3], Step [944/3236], Loss: 2.7130, Perplexity: 15.0740
Epoch [1/3], Step [945/3236], Loss: 2.8561, Perplexity: 17.3937
Epoch [1/3], Step [946/3236], Loss: 2.9260, Perplexity: 18.6525
Epoch [1/3], Step [947/3236], Loss: 2.6846, Perplexity: 14.6517
Epoch [1/3], Step [948/3236], Loss: 2.6614, Perplexity: 14.3161
Epoch [1/3], Step [949/3236], Loss: 2.9372, Perplexity: 18.8637
Epoch [1/3], Step [950/3236], Loss: 2.7220, Perplexity: 15.2105
Epoch [1/3], Step [951/3236], Loss: 3.3506, Perplexity: 28.5194
Epoch [1/3], Step [952/3236], Loss: 2.7961, Perplexity: 16.3808
Epoch [1/3], Step [953/3236], Loss: 2.6636, Perplexity: 14.3485
Epoch [1/3], Step [954/3236], Loss: 3.0255, Perplexity: 20.6039
Epoch [1/3], Step [955/3236], Loss: 2.6536, Perplexity: 14.2046
Epoch [1/3], Step [956/3236], Loss: 2.5110, Perplexity: 12.3178
Epoch [1/3], Step [957/3236], Loss: 2.6999, Perplexity: 14.8775
Epoch [1/3], Step [958/3236], Loss: 2.9999, Perplexity: 20.0834
Epoch [1/3], Step [959/3236], Loss: 2.8498, Perplexity: 17.2837
Epoch [1/3], Step [960/3236], Loss: 2.8087, Perplexity: 16.5876
Epoch [1/3], Step [961/3236], Loss: 2.7229, Perplexity: 15.2241
Epoch [1/3], Step [962/3236], Loss: 2.7603, Perplexity: 15.8045
Epoch [1/3], Step [963/3236], Loss: 3.0488, Perplexity: 21.0900
Epoch [1/3], Step [964/3236], Loss: 2.6631, Perplexity: 14.3411
Epoch [1/3], Step [965/3236], Loss: 2.7867, Perplexity: 16.2273
Epoch [1/3], Step [966/3236], Loss: 2.7191, Perplexity: 15.1663
Epoch [1/3], Step [967/3236], Loss: 2.6535, Perplexity: 14.2044
Epoch [1/3], Step [968/3236], Loss: 2.9303, Perplexity: 18.7330
Epoch [1/3], Step [969/3236], Loss: 2.8735, Perplexity: 17.6984
Epoch [1/3], Step [970/3236], Loss: 2.6653, Perplexity: 14.3719
Epoch [1/3], Step [971/3236], Loss: 2.7148, Perplexity: 15.1010
Epoch [1/3], Step [972/3236], Loss: 2.7747, Perplexity: 16.0340
Epoch [1/3], Step [973/3236], Loss: 2.9097, Perplexity: 18.3505
Epoch [1/3], Step [974/3236], Loss: 2.7824, Perplexity: 16.1583
Epoch [1/3], Step [975/3236], Loss: 3.0247, Perplexity: 20.5875
Epoch [1/3], Step [976/3236], Loss: 2.6954, Perplexity: 14.8114
Epoch [1/3], Step [977/3236], Loss: 2.6822, Perplexity: 14.6172
Epoch [1/3], Step [978/3236], Loss: 2.9095, Perplexity: 18.3485
Epoch [1/3], Step [979/3236], Loss: 2.5461, Perplexity: 12.7569
Epoch [1/3], Step [980/3236], Loss: 2.7461, Perplexity: 15.5812
Epoch [1/3], Step [981/3236], Loss: 2.5722, Perplexity: 13.0952
Epoch [1/3], Step [982/3236], Loss: 2.6912, Perplexity: 14.7490
Epoch [1/3], Step [983/3236], Loss: 2.6845, Perplexity: 14.6504
Epoch [1/3], Step [984/3236], Loss: 2.7498, Perplexity: 15.6390
Epoch [1/3], Step [985/3236], Loss: 2.7639, Perplexity: 15.8615
Epoch [1/3], Step [986/3236], Loss: 2.8038, Perplexity: 16.5073
Epoch [1/3], Step [987/3236], Loss: 2.6141, Perplexity: 13.6556
Epoch [1/3], Step [988/3236], Loss: 2.5119, Perplexity: 12.3280
Epoch [1/3], Step [989/3236], Loss: 2.8617, Perplexity: 17.4912
Epoch [1/3], Step [990/3236], Loss: 2.8344, Perplexity: 17.0205
Epoch [1/3], Step [991/3236], Loss: 2.9171, Perplexity: 18.4872
Epoch [1/3], Step [992/3236], Loss: 2.6937, Perplexity: 14.7856
Epoch [1/3], Step [993/3236], Loss: 2.6077, Perplexity: 13.5673
Epoch [1/3], Step [994/3236], Loss: 3.0408, Perplexity: 20.9223
Epoch [1/3], Step [995/3236], Loss: 2.4954, Perplexity: 12.1270
Epoch [1/3], Step [996/3236], Loss: 2.5250, Perplexity: 12.4914
Epoch [1/3], Step [997/3236], Loss: 2.8896, Perplexity: 17.9861
Epoch [1/3], Step [998/3236], Loss: 2.7712, Perplexity: 15.9779
Epoch [1/3], Step [999/3236], Loss: 2.8441, Perplexity: 17.1861
Epoch [1/3], Step [1000/3236], Loss: 2.7010, Perplexity: 14.8951