-
Notifications
You must be signed in to change notification settings - Fork 0
/
execution.log
1620 lines (1590 loc) · 66.4 KB
/
execution.log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
2021-03-08 08:40:08.648464: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-03-08 08:40:08.648519: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Env init done
Env init done
2021-03-08 08:40:11.857681: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-03-08 08:40:11.857788: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-03-08 08:40:11.857821: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (phylo): /proc/driver/nvidia/version does not exist
2021-03-08 08:40:11.859150: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
-0.79 max: [6.6666665]
WARNING:tensorflow:From /home/panna/.local/lib/python3.6/site-packages/tensorflow/python/autograph/impl/api.py:382: ReplayBuffer.get_next (from tf_agents.replay_buffers.replay_buffer) is deprecated and will be removed in a future version.
Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.
2021-03-08 08:40:17.186217: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2021-03-08 08:40:17.188347: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2299995000 Hz
08:40:18.780764
WARNING:tensorflow:From /home/panna/.local/lib/python3.6/site-packages/tensorflow/python/util/dispatch.py:206: calling foldr_v2 (from tensorflow.python.ops.functional_ops) with back_prop=False is deprecated and will be removed in a future version.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.foldr(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.foldr(fn, elems))
step = 250: loss = 0.05002320557832718
step = 500: loss = 0.34140545129776
step = 750: loss = 0.2333659827709198
step = 1000: loss = 0.4673877954483032
step = 1000: Average Return = 0.5866666436195374 , Max Return = [6.6666665]
step = 1250: loss = 0.2031446099281311
step = 1500: loss = 0.2340516746044159
step = 1750: loss = 0.450368732213974
step = 2000: loss = 1.0548934936523438
step = 2000: Average Return = 1.2333338260650635 , Max Return = [6.6666665]
step = 2250: loss = 0.4321872591972351
step = 2500: loss = 0.6815603971481323
step = 2750: loss = 0.9786102771759033
step = 3000: loss = 1.26641845703125
step = 3000: Average Return = 1.9133334159851074 , Max Return = [6.6666665]
step = 3250: loss = 0.6741860508918762
step = 3500: loss = 0.35717058181762695
step = 3750: loss = 2.096916437149048
step = 4000: loss = 1.169739007949829
step = 4000: Average Return = 1.7133331298828125 , Max Return = [10.]
step = 4250: loss = 0.5211189985275269
step = 4500: loss = 0.49491608142852783
step = 4750: loss = 0.41227030754089355
step = 5000: loss = 0.4588063657283783
step = 5000: Average Return = 1.1333335638046265 , Max Return = [10.]
step = 5250: loss = 0.2242075800895691
step = 5500: loss = 0.20814892649650574
step = 5750: loss = 0.2564168870449066
step = 6000: loss = 0.503656268119812
step = 6000: Average Return = 1.7799994945526123 , Max Return = [10.]
step = 6250: loss = 1.2204421758651733
step = 6500: loss = 0.6546468734741211
step = 6750: loss = 0.654392421245575
step = 7000: loss = 0.06462401896715164
step = 7000: Average Return = 2.4466657638549805 , Max Return = [6.6666665]
step = 7250: loss = 1.2447171211242676
step = 7500: loss = 0.6505082845687866
step = 7750: loss = 0.6289389133453369
step = 8000: loss = 0.7228125929832458
step = 8000: Average Return = 2.6666669845581055 , Max Return = [10.]
step = 8250: loss = 1.0051984786987305
step = 8500: loss = 0.3522628843784332
step = 8750: loss = 0.14667892456054688
step = 9000: loss = 0.35849103331565857
step = 9000: Average Return = 0.8000000715255737 , Max Return = [6.6666665]
step = 9250: loss = 0.7273366451263428
step = 9500: loss = 0.463484525680542
step = 9750: loss = 0.45974236726760864
step = 10000: loss = 0.38805443048477173
step = 10000: Average Return = 2.4666669368743896 , Max Return = [10.]
11:03:05.464798
[-1.0,0.58666664,1.2333338,1.9133334,1.7133331,1.1333336,1.7799995,2.4466658,2.666667,0.8000001,2.46666]
Env init done
Env init done
-0.85 max: [6.6666665]
11:03:11.463457
step = 250: loss = 0.2767345905303955
step = 500: loss = 1.4647908210754395
step = 750: loss = 1.300001621246338
step = 1000: loss = 1.7469286918640137
step = 1000: Average Return = 1.3666669130325317 , Max Return = [10.]
step = 1250: loss = 1.2664101123809814
step = 1500: loss = 0.972542405128479
step = 1750: loss = 0.5776553153991699
step = 2000: loss = 1.1180860996246338
step = 2000: Average Return = 2.299999475479126 , Max Return = [10.]
step = 2250: loss = 1.2556381225585938
step = 2500: loss = 1.5738643407821655
step = 2750: loss = 0.952377200126648
step = 3000: loss = 0.3295539319515228
step = 3000: Average Return = 4.733333587646484 , Max Return = [10.]
step = 3250: loss = 0.4186340868473053
step = 3500: loss = 0.6309987902641296
step = 3750: loss = 1.288682460784912
step = 4000: loss = 1.5911448001861572
step = 4000: Average Return = 4.766667366027832 , Max Return = [10.]
step = 4250: loss = 1.1398624181747437
step = 4500: loss = 1.9380764961242676
step = 4750: loss = 0.7733728885650635
step = 5000: loss = 1.5966073274612427
step = 5000: Average Return = 4.366666793823242 , Max Return = [10.]
step = 5250: loss = 0.9867565631866455
step = 5500: loss = 1.0977829694747925
step = 5750: loss = 0.921516478061676
step = 6000: loss = 2.0307626724243164
step = 6000: Average Return = 4.166666507720947 , Max Return = [10.]
step = 6250: loss = 0.750316321849823
step = 6500: loss = 0.8605191111564636
step = 6750: loss = 0.3590172827243805
step = 7000: loss = 0.3722341060638428
step = 7000: Average Return = 6.0333333015441895 , Max Return = [10.]
step = 7250: loss = 2.1407976150512695
step = 7500: loss = 0.5337360501289368
step = 7750: loss = 1.238670825958252
step = 8000: loss = 0.4027865529060364
step = 8000: Average Return = 5.046667575836182 , Max Return = [10.]
step = 8250: loss = 0.6609500646591187
step = 8500: loss = 1.8798456192016602
step = 8750: loss = 0.4763595461845398
step = 9000: loss = 0.888209879398346
step = 9000: Average Return = 5.69999885559082 , Max Return = [10.]
step = 9250: loss = 1.3424888849258423
step = 9500: loss = 0.8961889743804932
step = 9750: loss = 0.9222241640090942
step = 10000: loss = 1.30999755859375
step = 10000: Average Return = 5.366666793823242 , Max Return = [10.]
13:27:13.533711
[-1.0,1.3666669,2.2999995,4.7333336,4.7666674,4.366667,4.1666665,6.0333333,5.0466676,5.699999,5.36666]
Env init done
Env init done
-0.8933334 max: [3.3333333]
13:27:19.717002
step = 250: loss = 0.24023084342479706
step = 500: loss = 0.16963031888008118
step = 750: loss = 0.2877616882324219
step = 1000: loss = 0.29541581869125366
step = 1000: Average Return = 2.3799996376037598 , Max Return = [10.]
step = 1250: loss = 1.0639243125915527
step = 1500: loss = 1.79276704788208
step = 1750: loss = 0.7998049259185791
step = 2000: loss = 0.7041166424751282
step = 2000: Average Return = 4.000000953674316 , Max Return = [10.]
step = 2250: loss = 2.6689882278442383
step = 2500: loss = 1.5168529748916626
step = 2750: loss = 1.038845419883728
step = 3000: loss = 1.153487205505371
step = 3000: Average Return = 5.4333295822143555 , Max Return = [10.]
step = 3250: loss = 0.9075444936752319
step = 3500: loss = 0.8624371290206909
step = 3750: loss = 1.2587206363677979
step = 4000: loss = 1.3234628438949585
step = 4000: Average Return = 6.266666412353516 , Max Return = [10.]
step = 4250: loss = 0.8783647418022156
step = 4500: loss = 0.6714491844177246
step = 4750: loss = 0.7765015363693237
step = 5000: loss = 0.842681884765625
step = 5000: Average Return = 6.599999904632568 , Max Return = [10.]
step = 5250: loss = 1.39273202419281
step = 5500: loss = 0.4411082863807678
step = 5750: loss = 0.7826348543167114
step = 6000: loss = 0.9544785022735596
step = 6000: Average Return = 6.4666666984558105 , Max Return = [10.]
step = 6250: loss = 1.4188833236694336
step = 6500: loss = 1.5583076477050781
step = 6750: loss = 0.8621350526809692
step = 7000: loss = 2.176117420196533
step = 7000: Average Return = 6.59999942779541 , Max Return = [10.]
step = 7250: loss = 0.6923227310180664
step = 7500: loss = 0.9701268672943115
step = 7750: loss = 0.7935593724250793
step = 8000: loss = 1.0581514835357666
step = 8000: Average Return = 5.333332538604736 , Max Return = [10.]
step = 8250: loss = 0.5569676160812378
step = 8500: loss = 1.258719563484192
step = 8750: loss = 0.819268524646759
step = 9000: loss = 0.9267340302467346
step = 9000: Average Return = 5.679999828338623 , Max Return = [10.]
step = 9250: loss = 1.8362185955047607
step = 9500: loss = 0.8901209831237793
step = 9750: loss = 0.3501448631286621
step = 10000: loss = 0.8270381689071655
step = 10000: Average Return = 5.899998664855957 , Max Return = [10.]
15:52:18.985329
[-1.0,2.3799996,4.000001,5.4333296,6.2666664,6.6,6.4666667,6.5999994,5.3333325,5.68,5.899998]
Env init done
Env init done
-0.94 max: 0.0
15:52:47.775880
step = 250: loss = 0.13269229233264923
step = 500: loss = 2.5572433471679688
step = 750: loss = 1.5515155792236328
step = 1000: loss = 0.9510169625282288
step = 1000: Average Return = 3.7000012397766113 , Max Return = [10.]
step = 1250: loss = 1.6508159637451172
step = 1500: loss = 1.726975440979004
step = 1750: loss = 1.7049555778503418
step = 2000: loss = 1.119105339050293
step = 2000: Average Return = 5.69999885559082 , Max Return = [10.]
step = 2250: loss = 1.4837725162506104
step = 2500: loss = 1.8610780239105225
step = 2750: loss = 2.2704885005950928
step = 3000: loss = 1.251682162284851
step = 3000: Average Return = 6.833334445953369 , Max Return = [10.]
step = 3250: loss = 1.0407384634017944
step = 3500: loss = 1.039856195449829
step = 3750: loss = 1.443272590637207
step = 4000: loss = 1.9327020645141602
step = 4000: Average Return = 5.799998760223389 , Max Return = [10.]
step = 4250: loss = 0.6760305762290955
step = 4500: loss = 1.3602712154388428
step = 4750: loss = 0.9655570387840271
step = 5000: loss = 1.16108238697052
step = 5000: Average Return = 6.433332443237305 , Max Return = [10.]
step = 5250: loss = 0.9161651134490967
step = 5500: loss = 1.0963587760925293
step = 5750: loss = 0.7293415069580078
step = 6000: loss = 0.9612120389938354
step = 6000: Average Return = 6.199998378753662 , Max Return = [10.]
step = 6250: loss = 2.006347894668579
step = 6500: loss = 1.271870732307434
step = 6750: loss = 1.3389642238616943
step = 7000: loss = 0.913934588432312
step = 7000: Average Return = 7.133335113525391 , Max Return = [10.]
step = 7250: loss = 2.060971260070801
step = 7500: loss = 1.0888835191726685
step = 7750: loss = 1.7452576160430908
step = 8000: loss = 0.8552207946777344
step = 8000: Average Return = 6.466665744781494 , Max Return = [10.]
step = 8250: loss = 1.772351861000061
step = 8500: loss = 1.0336304903030396
step = 8750: loss = 1.4157230854034424
step = 9000: loss = 0.5241016149520874
step = 9000: Average Return = 7.766668796539307 , Max Return = [10.]
step = 9250: loss = 1.881049394607544
step = 9500: loss = 1.263392686843872
step = 9750: loss = 1.0739028453826904
step = 10000: loss = 1.2079906463623047
step = 10000: Average Return = 6.4333319664001465 , Max Return = [10.]
18:18:03.885615
[-1.0,3.7000012,5.699999,6.8333344,5.7999988,6.4333324,6.1999984,7.133335,6.4666657,7.766669,6.43333]
Env init done
Env init done
-0.93333334 max: [3.3333333]
18:18:19.720056
step = 250: loss = 0.20429037511348724
step = 500: loss = 0.28736376762390137
step = 750: loss = 0.23765826225280762
step = 1000: loss = 0.32097506523132324
step = 1250: loss = 0.8859891295433044
step = 1500: loss = 0.6445247530937195
step = 1750: loss = 0.8082446455955505
step = 2000: loss = 0.37121111154556274
step = 2000: Average Return = 1.440000057220459 , Max Return = [10.]
step = 2250: loss = 0.0824584811925888
step = 2500: loss = 0.8423669338226318
step = 2750: loss = 0.2735166549682617
step = 3000: loss = 0.33731335401535034
step = 3250: loss = 0.2183823436498642
step = 3500: loss = 0.5766491889953613
step = 3750: loss = 0.05398193374276161
step = 4000: loss = 0.5444415807723999
step = 4000: Average Return = 1.7166677713394165 , Max Return = [10.]
step = 4250: loss = 0.5556185245513916
step = 4500: loss = 0.6980871558189392
step = 4750: loss = 0.9988212585449219
step = 5000: loss = 0.6671582460403442
step = 5250: loss = 0.7649472951889038
step = 5500: loss = 0.5337924957275391
step = 5750: loss = 0.8298157453536987
step = 6000: loss = 0.5502618551254272
step = 6000: Average Return = 1.3666669130325317 , Max Return = [10.]
step = 6250: loss = 0.7972943186759949
step = 6500: loss = 1.204728364944458
step = 6750: loss = 0.448628306388855
step = 7000: loss = 0.5473660826683044
step = 7250: loss = 0.5297995209693909
step = 7500: loss = 0.46717649698257446
step = 7750: loss = 0.5713541507720947
step = 8000: loss = 0.5213385820388794
step = 8000: Average Return = 1.766667366027832 , Max Return = [10.]
step = 8250: loss = 0.20148667693138123
step = 8500: loss = 0.7766113877296448
step = 8750: loss = 1.1434792280197144
step = 9000: loss = 1.0352274179458618
step = 9250: loss = 0.3671254813671112
step = 9500: loss = 0.5060466527938843
step = 9750: loss = 0.2071622908115387
step = 10000: loss = 0.6320611238479614
step = 10000: Average Return = 1.459999442100525 , Max Return = [6.6666665]
step = 10250: loss = 1.3074556589126587
step = 10500: loss = 0.6873796582221985
step = 10750: loss = 0.7725280523300171
step = 11000: loss = 1.7521049976348877
step = 11250: loss = 0.34143608808517456
step = 11500: loss = 0.394811749458313
step = 11750: loss = 1.3303232192993164
step = 12000: loss = 1.205603003501892
step = 12000: Average Return = 2.8566668033599854 , Max Return = [10.]
step = 12250: loss = 0.5006411671638489
step = 12500: loss = 0.8429521918296814
step = 12750: loss = 0.7890636324882507
step = 13000: loss = 1.1252052783966064
step = 13250: loss = 0.14436833560466766
step = 13500: loss = 1.2999271154403687
step = 13750: loss = 0.5532223582267761
step = 14000: loss = 0.616880476474762
step = 14000: Average Return = 2.6000008583068848 , Max Return = [10.]
step = 14250: loss = 0.7258633971214294
step = 14500: loss = 1.0502636432647705
step = 14750: loss = 1.0966925621032715
step = 15000: loss = 0.6505954265594482
step = 15250: loss = 1.2803590297698975
step = 15500: loss = 0.31356319785118103
step = 15750: loss = 1.2616504430770874
step = 16000: loss = 1.1511309146881104
step = 16000: Average Return = 1.6666667461395264 , Max Return = [10.]
step = 16250: loss = 0.4654921889305115
step = 16500: loss = 0.9716662764549255
step = 16750: loss = 0.7221649885177612
step = 17000: loss = 0.7529404759407043
step = 17250: loss = 1.236878752708435
step = 17500: loss = 1.0050129890441895
step = 17750: loss = 0.10234928876161575
step = 18000: loss = 1.074042558670044
step = 18000: Average Return = 1.6500000953674316 , Max Return = [10.]
step = 18250: loss = 0.39073795080184937
step = 18500: loss = 0.27855420112609863
step = 18750: loss = 0.9018405675888062
step = 19000: loss = 0.0979473888874054
step = 19250: loss = 1.069185733795166
step = 19500: loss = 0.2507420778274536
step = 19750: loss = 0.48329436779022217
step = 20000: loss = 0.31592893600463867
step = 20000: Average Return = 3.0499989986419678 , Max Return = [10.]
23:06:30.813783
[0.16166659,1.44,1.7166678,1.3666669,1.7666674,1.4599994,2.8566668,2.6000009,1.6666667,1.6500001,3.04999]
Env init done
Env init done
-0.87833333 max: [3.3333333]
23:06:38.380790
step = 250: loss = 0.36402902007102966
step = 500: loss = 1.704821228981018
step = 750: loss = 1.2215510606765747
step = 1000: loss = 1.3591500520706177
step = 1250: loss = 0.29267236590385437
step = 1500: loss = 0.8634306192398071
step = 1750: loss = 1.456955909729004
step = 2000: loss = 0.6574317216873169
step = 2000: Average Return = 3.266664981842041 , Max Return = [10.]
step = 2250: loss = 1.409839391708374
step = 2500: loss = 0.9837749600410461
step = 2750: loss = 0.4036138355731964
step = 3000: loss = 2.9150006771087646
step = 3250: loss = 1.4286397695541382
step = 3500: loss = 1.9597951173782349
step = 3750: loss = 0.9729163646697998
step = 4000: loss = 0.9156062602996826
step = 4000: Average Return = 4.166665554046631 , Max Return = [10.]
step = 4250: loss = 0.6780866384506226
step = 4500: loss = 0.7196153402328491
step = 4750: loss = 1.3663289546966553
step = 5000: loss = 1.6505787372589111
step = 5250: loss = 1.5129358768463135
step = 5500: loss = 1.2546136379241943
step = 5750: loss = 1.3494826555252075
step = 6000: loss = 0.9991614818572998
step = 6000: Average Return = 4.149998664855957 , Max Return = [10.]
step = 6250: loss = 1.6975147724151611
step = 6500: loss = 2.4522311687469482
step = 6750: loss = 1.4692113399505615
step = 7000: loss = 0.8174072504043579
step = 7250: loss = 0.5728964805603027
step = 7500: loss = 1.0579476356506348
step = 7750: loss = 0.35604315996170044
step = 8000: loss = 1.9559630155563354
step = 8000: Average Return = 4.500001907348633 , Max Return = [10.]
step = 8250: loss = 1.2773209810256958
step = 8500: loss = 0.7976764440536499
step = 8750: loss = 0.8844021558761597
step = 9000: loss = 1.064577341079712
step = 9250: loss = 0.5402304530143738
step = 9500: loss = 1.5444345474243164
step = 9750: loss = 0.4484555423259735
step = 10000: loss = 1.312760591506958
step = 10000: Average Return = 4.750001430511475 , Max Return = [10.]
step = 10250: loss = 1.6645638942718506
step = 10500: loss = 0.644810676574707
step = 10750: loss = 1.3506433963775635
step = 11000: loss = 0.738797664642334
step = 11250: loss = 1.6994272470474243
step = 11500: loss = 0.60289466381073
step = 11750: loss = 1.457413673400879
step = 12000: loss = 0.9924174547195435
step = 12000: Average Return = 4.766665458679199 , Max Return = [10.]
step = 12250: loss = 1.4732648134231567
step = 12500: loss = 0.6248756647109985
step = 12750: loss = 1.4413199424743652
step = 13000: loss = 1.3663407564163208
step = 13250: loss = 1.621224284172058
step = 13500: loss = 1.3468258380889893
step = 13750: loss = 0.748764157295227
step = 14000: loss = 1.0704174041748047
step = 14000: Average Return = 5.4666666984558105 , Max Return = [10.]
step = 14250: loss = 1.3587162494659424
step = 14500: loss = 0.3396352231502533
step = 14750: loss = 1.5064868927001953
step = 15000: loss = 1.1327307224273682
step = 15250: loss = 1.4573804140090942
step = 15500: loss = 0.7921531796455383
step = 15750: loss = 1.383971095085144
step = 16000: loss = 1.3433212041854858
step = 16000: Average Return = 3.849999189376831 , Max Return = [10.]
step = 16250: loss = 1.1946403980255127
step = 16500: loss = 1.4974879026412964
step = 16750: loss = 2.1958162784576416
step = 17000: loss = 1.522981882095337
step = 17250: loss = 0.7277896404266357
step = 17500: loss = 1.4846396446228027
step = 17750: loss = 0.9158337116241455
step = 18000: loss = 2.1394248008728027
step = 18000: Average Return = 4.400000095367432 , Max Return = [10.]
step = 18250: loss = 0.5425732731819153
step = 18500: loss = 0.6777477264404297
step = 18750: loss = 1.667750597000122
step = 19000: loss = 1.5582406520843506
step = 19250: loss = 0.3050576448440552
step = 19500: loss = 0.9446527361869812
step = 19750: loss = 0.5767049789428711
step = 20000: loss = 0.7294870615005493
step = 20000: Average Return = 3.666666269302368 , Max Return = [10.]
03:50:18.229299
[-0.98,3.266665,4.1666656,4.1499987,4.500002,4.7500014,4.7666655,5.4666667,3.8499992,4.4,3.666666]
Env init done
Env init done
-0.845 max: [6.6666665]
03:51:08.426621
step = 250: loss = 0.00721672922372818
step = 500: loss = 0.4496554732322693
step = 750: loss = 1.3461092710494995
step = 1000: loss = 0.7996984124183655
step = 1250: loss = 0.9401673078536987
step = 1500: loss = 1.5622949600219727
step = 1750: loss = 0.5674173831939697
step = 2000: loss = 0.9876878261566162
step = 2000: Average Return = 4.266661643981934 , Max Return = [10.]
step = 2250: loss = 2.0741419792175293
step = 2500: loss = 1.4587156772613525
step = 2750: loss = 1.255282998085022
step = 3000: loss = 1.6221791505813599
step = 3250: loss = 2.6992039680480957
step = 3500: loss = 1.2689790725708008
step = 3750: loss = 1.8243433237075806
step = 4000: loss = 2.441382646560669
step = 4000: Average Return = 6.083336353302002 , Max Return = [10.]
step = 4250: loss = 1.1368459463119507
step = 4500: loss = 0.7705471515655518
step = 4750: loss = 2.237320899963379
step = 5000: loss = 1.4811272621154785
step = 5250: loss = 2.03959321975708
step = 5500: loss = 1.0388059616088867
step = 5750: loss = 1.8503327369689941
step = 6000: loss = 0.6373528838157654
step = 6000: Average Return = 6.256669521331787 , Max Return = [10.]
step = 6250: loss = 1.9099981784820557
step = 6500: loss = 1.1512019634246826
step = 6750: loss = 0.767269492149353
step = 7000: loss = 1.0642223358154297
step = 7250: loss = 0.6686549186706543
step = 7500: loss = 0.9569675326347351
step = 7750: loss = 1.026012659072876
step = 8000: loss = 0.8713213801383972
step = 8000: Average Return = 6.199999809265137 , Max Return = [10.]
step = 8250: loss = 1.11724853515625
step = 8500: loss = 1.77738618850708
step = 8750: loss = 0.5571439862251282
step = 9000: loss = 0.5860779881477356
step = 9250: loss = 1.4035993814468384
step = 9500: loss = 1.591731071472168
step = 9750: loss = 0.5840834379196167
step = 10000: loss = 1.0351455211639404
step = 10000: Average Return = 6.249997138977051 , Max Return = [10.]
step = 10250: loss = 1.3736213445663452
step = 10500: loss = 0.4389837086200714
step = 10750: loss = 0.8155933618545532
step = 11000: loss = 0.6092913746833801
step = 11250: loss = 1.5689167976379395
step = 11500: loss = 1.0101604461669922
step = 11750: loss = 1.5725042819976807
step = 12000: loss = 0.7004793882369995
step = 12000: Average Return = 5.983333587646484 , Max Return = [10.]
step = 12250: loss = 0.5605868101119995
step = 12500: loss = 1.5038576126098633
step = 12750: loss = 1.0939576625823975
step = 13000: loss = 0.43774163722991943
step = 13250: loss = 0.22513306140899658
step = 13500: loss = 1.3363324403762817
step = 13750: loss = 1.6120256185531616
step = 14000: loss = 1.3513758182525635
step = 14000: Average Return = 5.116668224334717 , Max Return = [10.]
step = 14250: loss = 0.9040378332138062
step = 14500: loss = 0.9840198755264282
step = 14750: loss = 1.8888715505599976
step = 15000: loss = 0.7414798140525818
step = 15250: loss = 1.2282658815383911
step = 15500: loss = 0.9093819856643677
step = 15750: loss = 1.4005067348480225
step = 16000: loss = 0.6544770002365112
step = 16000: Average Return = 6.016666412353516 , Max Return = [10.]
step = 16250: loss = 1.4140242338180542
step = 16500: loss = 1.5849848985671997
step = 16750: loss = 0.9185817241668701
step = 17000: loss = 0.9095059633255005
step = 17250: loss = 1.9575047492980957
step = 17500: loss = 0.5553542375564575
step = 17750: loss = 0.6430754661560059
step = 18000: loss = 0.49921321868896484
step = 18000: Average Return = 6.649996757507324 , Max Return = [10.]
step = 18250: loss = 1.472233533859253
step = 18500: loss = 1.4349391460418701
step = 18750: loss = 1.276090383529663
step = 19000: loss = 0.7946022748947144
step = 19250: loss = 1.3341047763824463
step = 19500: loss = 1.2398366928100586
step = 19750: loss = 0.8045104742050171
step = 20000: loss = 1.0498859882354736
step = 20000: Average Return = 5.58333683013916 , Max Return = [10.]
08:35:35.004894
[-1.0,4.2666616,6.0833364,6.2566695,6.2,6.249997,5.9833336,5.116668,6.0166664,6.6499968,5.58333]
Env init done
Env init done
-0.965 max: 0.0
08:35:43.434069
step = 250: loss = 0.20714831352233887
step = 500: loss = 1.7413384914398193
step = 750: loss = 1.559330940246582
step = 1000: loss = 1.2623755931854248
step = 1250: loss = 0.8683977127075195
step = 1500: loss = 1.1255617141723633
step = 1750: loss = 1.6212852001190186
step = 2000: loss = 1.5928972959518433
step = 2000: Average Return = 5.133335590362549 , Max Return = [10.]
step = 2250: loss = 1.4697173833847046
step = 2500: loss = 1.205899715423584
step = 2750: loss = 0.9699415564537048
step = 3000: loss = 1.4207992553710938
step = 3250: loss = 1.8831958770751953
step = 3500: loss = 1.5428996086120605
step = 3750: loss = 1.3304977416992188
step = 4000: loss = 1.5857114791870117
step = 4000: Average Return = 5.350000381469727 , Max Return = [10.]
step = 4250: loss = 1.5348069667816162
step = 4500: loss = 1.4210362434387207
step = 4750: loss = 0.7811075448989868
step = 5000: loss = 0.9524795413017273
step = 5250: loss = 1.3141605854034424
step = 5500: loss = 1.9588649272918701
step = 5750: loss = 0.8790140748023987
step = 6000: loss = 1.441609501838684
step = 6000: Average Return = 6.083334445953369 , Max Return = [10.]
step = 6250: loss = 1.028193712234497
step = 6500: loss = 0.6859111785888672
step = 6750: loss = 1.0529022216796875
step = 7000: loss = 1.3916559219360352
step = 7250: loss = 1.4172296524047852
step = 7500: loss = 0.6826497316360474
step = 7750: loss = 0.7611171007156372
step = 8000: loss = 1.0974159240722656
step = 8000: Average Return = 5.966667652130127 , Max Return = [10.]
step = 8250: loss = 0.9960530996322632
step = 8500: loss = 1.8376679420471191
step = 8750: loss = 0.9722110033035278
step = 9000: loss = 1.7175354957580566
step = 9250: loss = 0.8765255212783813
step = 9500: loss = 1.1536331176757812
step = 9750: loss = 1.2323849201202393
step = 10000: loss = 0.8243929743766785
step = 10000: Average Return = 6.699998378753662 , Max Return = [10.]
step = 10250: loss = 1.1831226348876953
step = 10500: loss = 1.507006287574768
step = 10750: loss = 1.4437294006347656
step = 11000: loss = 1.7533957958221436
step = 11250: loss = 0.8490651249885559
step = 11500: loss = 1.7295305728912354
step = 11750: loss = 0.2534051537513733
step = 12000: loss = 1.1132051944732666
step = 12000: Average Return = 6.650000095367432 , Max Return = [10.]
step = 12250: loss = 0.8832569122314453
step = 12500: loss = 1.6384153366088867
step = 12750: loss = 1.4524065256118774
step = 13000: loss = 1.3958940505981445
step = 13250: loss = 0.7001972198486328
step = 13500: loss = 1.1284122467041016
step = 13750: loss = 1.879719853401184
step = 14000: loss = 0.6198452711105347
step = 14000: Average Return = 6.666662216186523 , Max Return = [10.]
step = 14250: loss = 1.3650321960449219
step = 14500: loss = 1.2165517807006836
step = 14750: loss = 1.6714704036712646
step = 15000: loss = 0.9130278825759888
step = 15250: loss = 0.9879443049430847
step = 15500: loss = 0.7368725538253784
step = 15750: loss = 1.39190673828125
step = 16000: loss = 0.9754049777984619
step = 16000: Average Return = 6.316667556762695 , Max Return = [10.]
step = 16250: loss = 1.2882659435272217
step = 16500: loss = 1.901224970817566
step = 16750: loss = 0.7742275595664978
step = 17000: loss = 1.7267693281173706
step = 17250: loss = 1.1089107990264893
step = 17500: loss = 1.236027717590332
step = 17750: loss = 0.7603294253349304
step = 18000: loss = 0.8714041709899902
step = 18000: Average Return = 6.7333292961120605 , Max Return = [10.]
step = 18250: loss = 1.008323073387146
step = 18500: loss = 2.2641849517822266
step = 18750: loss = 1.4199106693267822
step = 19000: loss = 1.5679770708084106
step = 19250: loss = 1.236276626586914
step = 19500: loss = 1.2272887229919434
step = 19750: loss = 2.2621967792510986
step = 20000: loss = 0.9320936799049377
step = 20000: Average Return = 6.316667079925537 , Max Return = [10.]
13:06:33.201362
[-0.73666674,5.1333356,5.3500004,6.0833344,5.9666677,6.6999984,6.65,6.666662,6.3166676,6.7333293,6.31666]
Env init done
Env init done
-0.87666667 max: [10.]
13:06:41.776417
step = 250: loss = 0.010169360786676407
step = 500: loss = 0.8318861722946167
step = 750: loss = 0.47494012117385864
step = 1000: loss = 0.4568924307823181
step = 1250: loss = 0.14558647572994232
step = 1500: loss = 0.2730354070663452
step = 1750: loss = 0.22296956181526184
step = 2000: loss = 0.3969948887825012
step = 2250: loss = 0.33139628171920776
step = 2500: loss = 0.3505862057209015
step = 2750: loss = 0.20526032149791718
step = 3000: loss = 0.21169200539588928
step = 3000: Average Return = 1.720001459121704 , Max Return = [10.]
step = 3250: loss = 0.26164281368255615
step = 3500: loss = 0.4881210923194885
step = 3750: loss = 0.6764389872550964
step = 4000: loss = 0.9711015224456787
step = 4250: loss = 0.08145751059055328
step = 4500: loss = 0.41699719429016113
step = 4750: loss = 0.9551596641540527
step = 5000: loss = 1.0675207376480103
step = 5250: loss = 0.3279018700122833
step = 5500: loss = 0.619437038898468
step = 5750: loss = 0.8411011695861816
step = 6000: loss = 0.1961994618177414
step = 6000: Average Return = 1.7777791023254395 , Max Return = [10.]
step = 6250: loss = 0.4852905869483948
step = 6500: loss = 1.1567976474761963
step = 6750: loss = 0.49857577681541443
step = 7000: loss = 0.1450810432434082
step = 7250: loss = 0.3685331642627716
step = 7500: loss = 0.3460591435432434
step = 7750: loss = 0.35328900814056396
step = 8000: loss = 0.49092912673950195
step = 8250: loss = 0.20936593413352966
step = 8500: loss = 0.7784585952758789
step = 8750: loss = 0.3793160021305084
step = 9000: loss = 0.4860304594039917
step = 9000: Average Return = 3.588887929916382 , Max Return = [10.]
step = 9250: loss = 1.3670756816864014
step = 9500: loss = 1.0767455101013184
step = 9750: loss = 0.9645108580589294
step = 10000: loss = 0.2873089909553528
step = 10250: loss = 0.6759707927703857
step = 10500: loss = 0.5354668498039246
step = 10750: loss = 0.41779348254203796
step = 11000: loss = 0.2858586311340332
step = 11250: loss = 1.0768986940383911
step = 11500: loss = 0.7424832582473755
step = 11750: loss = 1.1949526071548462
step = 12000: loss = 0.3858063220977783
step = 12000: Average Return = 2.168889045715332 , Max Return = [10.]
step = 12250: loss = 0.4831686019897461
step = 12500: loss = 0.5799262523651123
step = 12750: loss = 0.17946290969848633
step = 13000: loss = 0.7740209102630615
step = 13250: loss = 1.405666708946228
step = 13500: loss = 0.17486855387687683
step = 13750: loss = 0.7607550621032715
step = 14000: loss = 0.5495584607124329
step = 14250: loss = 0.9294725656509399
step = 14500: loss = 0.32320863008499146
step = 14750: loss = 0.7035871744155884
step = 15000: loss = 0.3649514615535736
step = 15000: Average Return = 1.5822231769561768 , Max Return = [6.6666665]
step = 15250: loss = 0.3206990957260132
step = 15500: loss = 0.6576478481292725
step = 15750: loss = 0.9873444437980652
step = 16000: loss = 0.6315022110939026
step = 16250: loss = 0.3711160719394684
step = 16500: loss = 0.9593236446380615
step = 16750: loss = 0.6875506639480591
step = 17000: loss = 1.281496286392212
step = 17250: loss = 0.3962264060974121
step = 17500: loss = 0.3227255344390869
step = 17750: loss = 0.44826042652130127
step = 18000: loss = 0.5341120362281799
step = 18000: Average Return = 2.057778835296631 , Max Return = [10.]
step = 18250: loss = 0.6059044599533081
step = 18500: loss = 0.2591707408428192
step = 18750: loss = 0.18248416483402252
step = 19000: loss = 0.4213995039463043
step = 19250: loss = 1.4620754718780518
step = 19500: loss = 0.7283973097801208
step = 19750: loss = 0.957650899887085
step = 20000: loss = 0.27080023288726807
step = 20250: loss = 1.2796244621276855
step = 20500: loss = 0.2258872389793396
step = 20750: loss = 1.34931218624115
step = 21000: loss = 0.5432032942771912
step = 21000: Average Return = 1.8000012636184692 , Max Return = [10.]
step = 21250: loss = 0.8148845434188843
step = 21500: loss = 0.26045554876327515
step = 21750: loss = 1.0229554176330566
step = 22000: loss = 0.5284585952758789
step = 22250: loss = 0.25926274061203003
step = 22500: loss = 0.5976047515869141
step = 22750: loss = 0.08766539394855499
step = 23000: loss = 0.2767016887664795
step = 23250: loss = 0.4092790484428406
step = 23500: loss = 1.035478115081787
step = 23750: loss = 0.9204162359237671
step = 24000: loss = 0.8216138482093811
step = 24000: Average Return = 1.3311116695404053 , Max Return = [10.]
step = 24250: loss = 0.4908120632171631
step = 24500: loss = 1.1161444187164307
step = 24750: loss = 0.6020159721374512
step = 25000: loss = 0.9446805715560913
step = 25250: loss = 0.19426682591438293
step = 25500: loss = 0.3037036657333374
step = 25750: loss = 0.8445931673049927
step = 26000: loss = 0.981245219707489
step = 26250: loss = 0.5102856159210205
step = 26500: loss = 0.54598069190979
step = 26750: loss = 0.37268784642219543
step = 27000: loss = 0.33090609312057495
step = 27000: Average Return = 1.591111660003662 , Max Return = [10.]
step = 27250: loss = 0.36472731828689575
step = 27500: loss = 0.36612650752067566
step = 27750: loss = 0.2794521152973175
step = 28000: loss = 1.2613322734832764
step = 28250: loss = 0.9600263833999634
step = 28500: loss = 0.47490882873535156
step = 28750: loss = 0.7264387607574463
step = 29000: loss = 0.40079817175865173
step = 29250: loss = 1.6318327188491821
step = 29500: loss = 0.7973394989967346
step = 29750: loss = 0.28851503133773804
step = 30000: loss = 0.8285247087478638
step = 30000: Average Return = 1.0155556201934814 , Max Return = [10.]
19:25:18.839313
[-1.0,1.7200015,1.7777791,3.588888,2.168889,1.5822232,2.0577788,1.8000013,1.3311117,1.5911117,1.015555]
Env init done
Env init done
-0.84000003 max: [10.]
19:25:27.562663
step = 250: loss = 0.04665771499276161
step = 500: loss = 0.645979642868042
step = 750: loss = 1.2678475379943848
step = 1000: loss = 0.7153573036193848
step = 1250: loss = 0.7988787889480591
step = 1500: loss = 0.6242499351501465
step = 1750: loss = 1.2152457237243652
step = 2000: loss = 3.1933224201202393
step = 2250: loss = 1.4814178943634033
step = 2500: loss = 0.8569775223731995
step = 2750: loss = 0.6123332977294922
step = 3000: loss = 0.6649959683418274
step = 3000: Average Return = 4.0333333015441895 , Max Return = [10.]
step = 3250: loss = 2.263606548309326
step = 3500: loss = 1.4902572631835938
step = 3750: loss = 0.779630184173584
step = 4000: loss = 0.7215322852134705
step = 4250: loss = 0.5273748636245728
step = 4500: loss = 1.3663911819458008
step = 4750: loss = 1.465315580368042
step = 5000: loss = 0.7715011835098267
step = 5250: loss = 0.3916049003601074
step = 5500: loss = 0.5920247435569763
step = 5750: loss = 0.8855323791503906
step = 6000: loss = 0.8167449235916138
step = 6000: Average Return = 4.87777853012085 , Max Return = [10.]
step = 6250: loss = 0.7589813470840454
step = 6500: loss = 0.349770188331604
step = 6750: loss = 0.6427603960037231
step = 7000: loss = 0.7564130425453186
step = 7250: loss = 0.716625988483429
step = 7500: loss = 1.2789649963378906
step = 7750: loss = 1.8169081211090088
step = 8000: loss = 1.814432144165039
step = 8250: loss = 1.2697278261184692
step = 8500: loss = 1.0671876668930054
step = 8750: loss = 1.387587308883667
step = 9000: loss = 0.45794057846069336
step = 9000: Average Return = 5.222219944000244 , Max Return = [10.]
step = 9250: loss = 0.34663379192352295
step = 9500: loss = 1.8781260251998901
step = 9750: loss = 0.9657644033432007
step = 10000: loss = 1.6018822193145752
step = 10250: loss = 0.9079001545906067
step = 10500: loss = 0.9430903196334839
step = 10750: loss = 0.9381492137908936
step = 11000: loss = 0.8938491344451904
step = 11250: loss = 1.0995402336120605
step = 11500: loss = 0.46888938546180725
step = 11750: loss = 1.4672118425369263
step = 12000: loss = 0.7612961530685425
step = 12000: Average Return = 4.8222222328186035 , Max Return = [10.]
step = 12250: loss = 1.1552449464797974
step = 12500: loss = 1.7839875221252441
step = 12750: loss = 0.5941104292869568
step = 13000: loss = 2.4149680137634277
step = 13250: loss = 0.32950663566589355
step = 13500: loss = 0.9163745641708374
step = 13750: loss = 0.896569550037384
step = 14000: loss = 0.6523663401603699
step = 14250: loss = 0.9005833864212036
step = 14500: loss = 1.346333622932434
step = 14750: loss = 0.3317261338233948
step = 15000: loss = 0.6268154382705688
step = 15000: Average Return = 5.944437026977539 , Max Return = [10.]
step = 15250: loss = 1.0081312656402588
step = 15500: loss = 1.6871057748794556
step = 15750: loss = 1.473912000656128
step = 16000: loss = 0.7961505055427551
step = 16250: loss = 0.2755456268787384
step = 16500: loss = 1.139377474784851
step = 16750: loss = 0.9299867153167725
step = 17000: loss = 0.5557854175567627
step = 17250: loss = 1.569650650024414
step = 17500: loss = 1.4194939136505127
step = 17750: loss = 1.0171570777893066
step = 18000: loss = 2.4368486404418945
step = 18000: Average Return = 5.677775859832764 , Max Return = [10.]
step = 18250: loss = 1.367164969444275
step = 18500: loss = 0.9799065589904785
step = 18750: loss = 1.2374852895736694
step = 19000: loss = 1.0978357791900635
step = 19250: loss = 0.27448979020118713
step = 19500: loss = 0.7859684228897095
step = 19750: loss = 0.25658681988716125
step = 20000: loss = 0.953184187412262
step = 20250: loss = 0.7949265837669373
step = 20500: loss = 0.8464541435241699
step = 20750: loss = 1.254571795463562
step = 21000: loss = 0.5224780440330505
step = 21000: Average Return = 4.744438648223877 , Max Return = [10.]
step = 21250: loss = 1.3753888607025146
step = 21500: loss = 1.6946568489074707
step = 21750: loss = 2.5006165504455566
step = 22000: loss = 0.9760310649871826
step = 22250: loss = 1.1448112726211548
step = 22500: loss = 2.209486722946167
step = 22750: loss = 1.5056450366973877
step = 23000: loss = 1.7630295753479004
step = 23250: loss = 0.6484242677688599
step = 23500: loss = 1.886527419090271
step = 23750: loss = 0.2801162302494049
step = 24000: loss = 0.9100450277328491
step = 24000: Average Return = 5.599998950958252 , Max Return = [10.]
step = 24250: loss = 0.7533604502677917
step = 24500: loss = 1.7969183921813965
step = 24750: loss = 0.9942283034324646
step = 25000: loss = 1.8606394529342651
step = 25250: loss = 1.9227333068847656
step = 25500: loss = 0.7966181635856628
step = 25750: loss = 0.4961821734905243
step = 26000: loss = 0.4958696961402893
step = 26250: loss = 0.608927845954895
step = 26500: loss = 1.4005892276763916
step = 26750: loss = 1.1677830219268799
step = 27000: loss = 1.2220964431762695
step = 27000: Average Return = 6.3444366455078125 , Max Return = [10.]
step = 27250: loss = 1.1161856651306152
step = 27500: loss = 0.624143123626709
step = 27750: loss = 1.0659735202789307
step = 28000: loss = 1.1037710905075073
step = 28250: loss = 0.9592800140380859
step = 28500: loss = 0.8587343096733093
step = 28750: loss = 0.41930723190307617
step = 29000: loss = 0.41283875703811646
step = 29250: loss = 0.931176483631134
step = 29500: loss = 0.21799398958683014
step = 29750: loss = 1.0131704807281494
step = 30000: loss = 1.3881959915161133
step = 30000: Average Return = 5.226665496826172 , Max Return = [10.]
01:52:06.075880
[-0.66222227,4.0333333,4.8777785,5.22222,4.822222,5.944437,5.677776,4.7444386,5.599999,6.3444366,5.226665]
Env init done
Env init done
-0.78888893 max: [6.6666665]
01:52:14.373673
step = 250: loss = 0.06107410043478012
step = 500: loss = 0.26998770236968994
step = 750: loss = 0.6238675713539124
step = 1000: loss = 0.9623085856437683
step = 1250: loss = 0.1584950089454651
step = 1500: loss = 1.47488534450531
step = 1750: loss = 1.800471544265747
step = 2000: loss = 1.6857409477233887
step = 2250: loss = 1.7264583110809326
step = 2500: loss = 0.40899568796157837
step = 2750: loss = 2.4414310455322266
step = 3000: loss = 1.3556292057037354
step = 3000: Average Return = 4.011109828948975 , Max Return = [10.]
step = 3250: loss = 1.9963431358337402
step = 3500: loss = 0.8548169136047363
step = 3750: loss = 1.5331777334213257
step = 4000: loss = 0.5274689197540283
step = 4250: loss = 0.6111419796943665
step = 4500: loss = 0.9533331990242004
step = 4750: loss = 2.042795181274414
step = 5000: loss = 0.7483835220336914
step = 5250: loss = 1.1473592519760132
step = 5500: loss = 0.539319634437561
step = 5750: loss = 0.927514910697937
step = 6000: loss = 1.6048210859298706
step = 6000: Average Return = 5.199997901916504 , Max Return = [10.]
step = 6250: loss = 0.8531596064567566
step = 6500: loss = 0.5827792882919312
step = 6750: loss = 0.3347373902797699
step = 7000: loss = 1.8470075130462646
step = 7250: loss = 0.964622437953949
step = 7500: loss = 1.0056962966918945
step = 7750: loss = 1.5391695499420166
step = 8000: loss = 1.6845099925994873
step = 8250: loss = 0.6348296403884888
step = 8500: loss = 1.2752798795700073
step = 8750: loss = 1.098633050918579
step = 9000: loss = 2.712280750274658
step = 9000: Average Return = 6.3999924659729 , Max Return = [10.]
step = 9250: loss = 0.6098355650901794
step = 9500: loss = 1.9026520252227783
step = 9750: loss = 1.9302232265472412
step = 10000: loss = 1.6451148986816406
step = 10250: loss = 0.7750967741012573
step = 10500: loss = 1.771236777305603
step = 10750: loss = 2.36197829246521
step = 11000: loss = 0.8843347430229187
step = 11250: loss = 1.2854268550872803
step = 11500: loss = 1.0783226490020752
step = 11750: loss = 1.6328045129776
step = 12000: loss = 0.7483181953430176
step = 12000: Average Return = 5.611107349395752 , Max Return = [10.]
step = 12250: loss = 1.267242670059204
step = 12500: loss = 0.6090630888938904
step = 12750: loss = 1.687625527381897
step = 13000: loss = 2.48938250541687
step = 13250: loss = 1.0369716882705688
step = 13500: loss = 1.9689953327178955
step = 13750: loss = 0.7834876179695129
step = 14000: loss = 0.8187060356140137
step = 14250: loss = 1.2605433464050293
step = 14500: loss = 1.3323040008544922
step = 14750: loss = 0.6224596500396729
step = 15000: loss = 1.4275662899017334
step = 15000: Average Return = 5.244438648223877 , Max Return = [10.]
step = 15250: loss = 1.220001220703125
step = 15500: loss = 1.0441076755523682
step = 15750: loss = 1.489040732383728
step = 16000: loss = 1.081411600112915
step = 16250: loss = 1.5445163249969482
step = 16500: loss = 1.6093385219573975
step = 16750: loss = 1.3447550535202026
step = 17000: loss = 1.816206932067871
step = 17250: loss = 1.8592867851257324
step = 17500: loss = 0.5951974391937256