Skip to content

Commit 4d18da5

Browse files
committed
Q Learning bug
1 parent deb8516 commit 4d18da5

8 files changed

+576
-148
lines changed

.ipynb_checkpoints/Andre-RunPlots-checkpoint.ipynb

+244-14
Large diffs are not rendered by default.

.ipynb_checkpoints/QLearning-checkpoint.ipynb

+52-57
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@
232232
},
233233
{
234234
"cell_type": "code",
235-
"execution_count": 5,
235+
"execution_count": 62,
236236
"metadata": {
237237
"collapsed": false
238238
},
@@ -261,10 +261,6 @@
261261
" self.trace_size = trace_size\n",
262262
" self.last_board_state = None\n",
263263
" self.last_action = None\n",
264-
"\n",
265-
"# def reset(self):\n",
266-
"# self.last_board_state = None\n",
267-
"# self.last_action = None\n",
268264
" \n",
269265
" def softmax(self, next_board_state):\n",
270266
" \"\"\"\n",
@@ -292,7 +288,7 @@
292288
},
293289
{
294290
"cell_type": "code",
295-
"execution_count": 443,
291+
"execution_count": 63,
296292
"metadata": {
297293
"collapsed": false
298294
},
@@ -356,7 +352,7 @@
356352
},
357353
{
358354
"cell_type": "code",
359-
"execution_count": 6,
355+
"execution_count": 64,
360356
"metadata": {
361357
"collapsed": false
362358
},
@@ -466,7 +462,7 @@
466462
},
467463
{
468464
"cell_type": "code",
469-
"execution_count": 7,
465+
"execution_count": 65,
470466
"metadata": {
471467
"collapsed": false
472468
},
@@ -478,21 +474,31 @@
478474
},
479475
{
480476
"cell_type": "code",
481-
"execution_count": 8,
477+
"execution_count": 66,
482478
"metadata": {
483479
"collapsed": false
484480
},
485481
"outputs": [
486482
{
487-
"ename": "NameError",
488-
"evalue": "name 'run_trial' is not defined",
489-
"output_type": "error",
490-
"traceback": [
491-
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
492-
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
493-
"\u001b[0;32m<ipython-input-8-d1652b569c04>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mrun_trial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0magent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
494-
"\u001b[0;31mNameError\u001b[0m: name 'run_trial' is not defined"
483+
"name": "stdout",
484+
"output_type": "stream",
485+
"text": [
486+
"Won!\n",
487+
"Won!\n"
495488
]
489+
},
490+
{
491+
"data": {
492+
"text/plain": [
493+
"([0, 0, 0, 50, 0, 0, 50, 0, 0, 50],\n",
494+
" [50],\n",
495+
" [0, 0, 0, 50, 50, 50, 100, 100, 100, 150],\n",
496+
" [50])"
497+
]
498+
},
499+
"execution_count": 66,
500+
"metadata": {},
501+
"output_type": "execute_result"
496502
}
497503
],
498504
"source": [
@@ -501,7 +507,7 @@
501507
},
502508
{
503509
"cell_type": "code",
504-
"execution_count": 9,
510+
"execution_count": 67,
505511
"metadata": {
506512
"collapsed": false
507513
},
@@ -590,7 +596,7 @@
590596
},
591597
{
592598
"cell_type": "code",
593-
"execution_count": 10,
599+
"execution_count": 68,
594600
"metadata": {
595601
"collapsed": true
596602
},
@@ -612,7 +618,7 @@
612618
},
613619
{
614620
"cell_type": "code",
615-
"execution_count": 11,
621+
"execution_count": 69,
616622
"metadata": {
617623
"collapsed": true
618624
},
@@ -684,7 +690,7 @@
684690
},
685691
{
686692
"cell_type": "code",
687-
"execution_count": 56,
693+
"execution_count": 70,
688694
"metadata": {
689695
"collapsed": false
690696
},
@@ -709,22 +715,22 @@
709715
" \n",
710716
" p1 = Q_Learner(x, None, None, player=1)\n",
711717
" p2 = Random_Learner(x)\n",
712-
" play_game_no_output(x, p1, p2, True)\n",
718+
" play_game_no_output(x, p1, p2)\n",
713719
" \n",
714720
" for game in xrange(1, num_trials):\n",
715721
" \n",
716722
" x = ConnectN(grid_size, N)\n",
717723
" p1 = Q_Learner(x, p1.value_table, None, player=1)\n",
718724
" p2 = Random_Learner(x)\n",
719-
" play_game_no_output(x, p1, p2, True)\n",
725+
" play_game_no_output(x, p1, p2)\n",
720726
"\n",
721727
" if game == num_trials - 1:\n",
722728
" return p1.value_table\n"
723729
]
724730
},
725731
{
726732
"cell_type": "code",
727-
"execution_count": 60,
733+
"execution_count": 71,
728734
"metadata": {
729735
"collapsed": false
730736
},
@@ -735,7 +741,7 @@
735741
"2"
736742
]
737743
},
738-
"execution_count": 60,
744+
"execution_count": 71,
739745
"metadata": {},
740746
"output_type": "execute_result"
741747
}
@@ -753,7 +759,7 @@
753759
},
754760
{
755761
"cell_type": "code",
756-
"execution_count": 61,
762+
"execution_count": 72,
757763
"metadata": {
758764
"collapsed": false
759765
},
@@ -764,7 +770,7 @@
764770
"{'0000000000000000000000000000000000001-100001-1-10000': array([ 0., 0., 15., 0., 0., 0., 0.])}"
765771
]
766772
},
767-
"execution_count": 61,
773+
"execution_count": 72,
768774
"metadata": {},
769775
"output_type": "execute_result"
770776
}
@@ -775,7 +781,7 @@
775781
},
776782
{
777783
"cell_type": "code",
778-
"execution_count": 59,
784+
"execution_count": 73,
779785
"metadata": {
780786
"collapsed": false,
781787
"scrolled": true
@@ -784,37 +790,26 @@
784790
{
785791
"data": {
786792
"text/plain": [
787-
"{'000000000000000000000-100000010000001-1000001-1001-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
788-
" '000000000000000000000000000000000000-1000001-100100': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
789-
" '0000000000000000000000000000000000000000000-100100': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
790-
" '0000000000000000000000000000000000000000000000000': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
791-
" '000000000000000000000000000000000000000000000100-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
792-
" '0000000000000000000000000000000000000000001-100100': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
793-
" '0000000000000000000000000000000000000000001001-10-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
794-
" '000000000000000000000000000000000000000000100100-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
795-
" '000000000000000000000000000000000001-1000001-1001-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
796-
" '000000000000000000000000000000000001-1000001-100100': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
797-
" '00000000000000000000000000000000000100000010-11-10-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
798-
" '0000000000000000000000000000000000010000001001-10-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
799-
" '000000000000000000000000000010000001-1000001-1001-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
800-
" '000000000000000000000000000010000001000-10010-11-10-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
801-
" '00000000000000000000000000001000000100000010-11-10-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
802-
" '000000000000000000000100000010000001000-10010-11-10-1': array([ 25., 0., 0., 0., 0., 0., 0.]),\n",
803-
" '000000000000001000000-100000010000001-1000001-1001-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
804-
" '000000000000001000000-100000010000001-1000001-1001-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
805-
" '000000010000001000000-10000001-1000001-1000001-1001-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
806-
" '000000010000001000000-100000010000001-1000001-1001-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
807-
" '100000010000001000-100-100010-11-110-1-1-11-1101-111-1-111-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
808-
" '100000010000001000000-100000-11-1000-1-11-1101-111-1-111-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
809-
" '100000010000001000000-100000-11-110-1-1-11-1101-111-1-111-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
810-
" '100000010000001000000-10000001-1000-1-11-1001-111-1-111-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
811-
" '100000010000001000000-10000001-1000-101-1001-111-1-101-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
812-
" '100000010000001000000-10000001-1000001-1000-101-1001-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
813-
" '100000010000001000000-10000001-1000001-1000001-1001-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
814-
" '100000010000001000000-10000001-1000001-1001-101-1-101-1-1': array([ 0., 0., 0., 0., 0., 0., 0.])}"
793+
"{'0000000000000000000000000000000000000000000000000': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
794+
" '00000000000000000000000000000000000000000000100-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
795+
" '00000000000000000000000000000000000000000010-10000': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
796+
" '00000000000000000000000000000000000000000010100-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
797+
" '00000000000000000000000000000000000000000010100-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
798+
" '0000000000000000000000000000000000010-1000010100-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
799+
" '0000000000000000000000000000000000010000-1010-110-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
800+
" '00000000000000000000000000000000000100000010-100-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
801+
" '00000000000000000000000000000000000100000010-10000': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
802+
" '00000000000000000000000000000000000100000010-110-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
803+
" '00000000000000000000000000000000000100000010100-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
804+
" '0000000000000000000000000000100000010-100001-1100-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
805+
" '0000000000000000000000000000100000010-1000010100-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
806+
" '0000000000000000000000000000100000010000-1010-11-1-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
807+
" '0000000000000000000000000000100000010000-1010-110-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n",
808+
" '0000000000000000000001000000100000010-100001-1100-1-1': array([ 25., 0., 0., 0., 0., 0., 0.]),\n",
809+
" '0000000000000000000001000000100000010000-1010-11-1-10': array([ 25., 0., 0., 0., 0., 0., 0.])}"
815810
]
816811
},
817-
"execution_count": 59,
812+
"execution_count": 73,
818813
"metadata": {},
819814
"output_type": "execute_result"
820815
}

0 commit comments

Comments
 (0)