|
232 | 232 | },
|
233 | 233 | {
|
234 | 234 | "cell_type": "code",
|
235 |
| - "execution_count": 5, |
| 235 | + "execution_count": 62, |
236 | 236 | "metadata": {
|
237 | 237 | "collapsed": false
|
238 | 238 | },
|
|
261 | 261 | " self.trace_size = trace_size\n",
|
262 | 262 | " self.last_board_state = None\n",
|
263 | 263 | " self.last_action = None\n",
|
264 |
| - "\n", |
265 |
| - "# def reset(self):\n", |
266 |
| - "# self.last_board_state = None\n", |
267 |
| - "# self.last_action = None\n", |
268 | 264 | " \n",
|
269 | 265 | " def softmax(self, next_board_state):\n",
|
270 | 266 | " \"\"\"\n",
|
|
292 | 288 | },
|
293 | 289 | {
|
294 | 290 | "cell_type": "code",
|
295 |
| - "execution_count": 443, |
| 291 | + "execution_count": 63, |
296 | 292 | "metadata": {
|
297 | 293 | "collapsed": false
|
298 | 294 | },
|
|
356 | 352 | },
|
357 | 353 | {
|
358 | 354 | "cell_type": "code",
|
359 |
| - "execution_count": 6, |
| 355 | + "execution_count": 64, |
360 | 356 | "metadata": {
|
361 | 357 | "collapsed": false
|
362 | 358 | },
|
|
466 | 462 | },
|
467 | 463 | {
|
468 | 464 | "cell_type": "code",
|
469 |
| - "execution_count": 7, |
| 465 | + "execution_count": 65, |
470 | 466 | "metadata": {
|
471 | 467 | "collapsed": false
|
472 | 468 | },
|
|
478 | 474 | },
|
479 | 475 | {
|
480 | 476 | "cell_type": "code",
|
481 |
| - "execution_count": 8, |
| 477 | + "execution_count": 66, |
482 | 478 | "metadata": {
|
483 | 479 | "collapsed": false
|
484 | 480 | },
|
485 | 481 | "outputs": [
|
486 | 482 | {
|
487 |
| - "ename": "NameError", |
488 |
| - "evalue": "name 'run_trial' is not defined", |
489 |
| - "output_type": "error", |
490 |
| - "traceback": [ |
491 |
| - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
492 |
| - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", |
493 |
| - "\u001b[0;32m<ipython-input-8-d1652b569c04>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mrun_trial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0magent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", |
494 |
| - "\u001b[0;31mNameError\u001b[0m: name 'run_trial' is not defined" |
| 483 | + "name": "stdout", |
| 484 | + "output_type": "stream", |
| 485 | + "text": [ |
| 486 | + "Won!\n", |
| 487 | + "Won!\n" |
495 | 488 | ]
|
| 489 | + }, |
| 490 | + { |
| 491 | + "data": { |
| 492 | + "text/plain": [ |
| 493 | + "([0, 0, 0, 50, 0, 0, 50, 0, 0, 50],\n", |
| 494 | + " [50],\n", |
| 495 | + " [0, 0, 0, 50, 50, 50, 100, 100, 100, 150],\n", |
| 496 | + " [50])" |
| 497 | + ] |
| 498 | + }, |
| 499 | + "execution_count": 66, |
| 500 | + "metadata": {}, |
| 501 | + "output_type": "execute_result" |
496 | 502 | }
|
497 | 503 | ],
|
498 | 504 | "source": [
|
|
501 | 507 | },
|
502 | 508 | {
|
503 | 509 | "cell_type": "code",
|
504 |
| - "execution_count": 9, |
| 510 | + "execution_count": 67, |
505 | 511 | "metadata": {
|
506 | 512 | "collapsed": false
|
507 | 513 | },
|
|
590 | 596 | },
|
591 | 597 | {
|
592 | 598 | "cell_type": "code",
|
593 |
| - "execution_count": 10, |
| 599 | + "execution_count": 68, |
594 | 600 | "metadata": {
|
595 | 601 | "collapsed": true
|
596 | 602 | },
|
|
612 | 618 | },
|
613 | 619 | {
|
614 | 620 | "cell_type": "code",
|
615 |
| - "execution_count": 11, |
| 621 | + "execution_count": 69, |
616 | 622 | "metadata": {
|
617 | 623 | "collapsed": true
|
618 | 624 | },
|
|
684 | 690 | },
|
685 | 691 | {
|
686 | 692 | "cell_type": "code",
|
687 |
| - "execution_count": 56, |
| 693 | + "execution_count": 70, |
688 | 694 | "metadata": {
|
689 | 695 | "collapsed": false
|
690 | 696 | },
|
|
709 | 715 | " \n",
|
710 | 716 | " p1 = Q_Learner(x, None, None, player=1)\n",
|
711 | 717 | " p2 = Random_Learner(x)\n",
|
712 |
| - " play_game_no_output(x, p1, p2, True)\n", |
| 718 | + " play_game_no_output(x, p1, p2)\n", |
713 | 719 | " \n",
|
714 | 720 | " for game in xrange(1, num_trials):\n",
|
715 | 721 | " \n",
|
716 | 722 | " x = ConnectN(grid_size, N)\n",
|
717 | 723 | " p1 = Q_Learner(x, p1.value_table, None, player=1)\n",
|
718 | 724 | " p2 = Random_Learner(x)\n",
|
719 |
| - " play_game_no_output(x, p1, p2, True)\n", |
| 725 | + " play_game_no_output(x, p1, p2)\n", |
720 | 726 | "\n",
|
721 | 727 | " if game == num_trials - 1:\n",
|
722 | 728 | " return p1.value_table\n"
|
723 | 729 | ]
|
724 | 730 | },
|
725 | 731 | {
|
726 | 732 | "cell_type": "code",
|
727 |
| - "execution_count": 60, |
| 733 | + "execution_count": 71, |
728 | 734 | "metadata": {
|
729 | 735 | "collapsed": false
|
730 | 736 | },
|
|
735 | 741 | "2"
|
736 | 742 | ]
|
737 | 743 | },
|
738 |
| - "execution_count": 60, |
| 744 | + "execution_count": 71, |
739 | 745 | "metadata": {},
|
740 | 746 | "output_type": "execute_result"
|
741 | 747 | }
|
|
753 | 759 | },
|
754 | 760 | {
|
755 | 761 | "cell_type": "code",
|
756 |
| - "execution_count": 61, |
| 762 | + "execution_count": 72, |
757 | 763 | "metadata": {
|
758 | 764 | "collapsed": false
|
759 | 765 | },
|
|
764 | 770 | "{'0000000000000000000000000000000000001-100001-1-10000': array([ 0., 0., 15., 0., 0., 0., 0.])}"
|
765 | 771 | ]
|
766 | 772 | },
|
767 |
| - "execution_count": 61, |
| 773 | + "execution_count": 72, |
768 | 774 | "metadata": {},
|
769 | 775 | "output_type": "execute_result"
|
770 | 776 | }
|
|
775 | 781 | },
|
776 | 782 | {
|
777 | 783 | "cell_type": "code",
|
778 |
| - "execution_count": 59, |
| 784 | + "execution_count": 73, |
779 | 785 | "metadata": {
|
780 | 786 | "collapsed": false,
|
781 | 787 | "scrolled": true
|
|
784 | 790 | {
|
785 | 791 | "data": {
|
786 | 792 | "text/plain": [
|
787 |
| - "{'000000000000000000000-100000010000001-1000001-1001-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
788 |
| - " '000000000000000000000000000000000000-1000001-100100': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
789 |
| - " '0000000000000000000000000000000000000000000-100100': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
790 |
| - " '0000000000000000000000000000000000000000000000000': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
791 |
| - " '000000000000000000000000000000000000000000000100-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
792 |
| - " '0000000000000000000000000000000000000000001-100100': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
793 |
| - " '0000000000000000000000000000000000000000001001-10-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
794 |
| - " '000000000000000000000000000000000000000000100100-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
795 |
| - " '000000000000000000000000000000000001-1000001-1001-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
796 |
| - " '000000000000000000000000000000000001-1000001-100100': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
797 |
| - " '00000000000000000000000000000000000100000010-11-10-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
798 |
| - " '0000000000000000000000000000000000010000001001-10-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
799 |
| - " '000000000000000000000000000010000001-1000001-1001-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
800 |
| - " '000000000000000000000000000010000001000-10010-11-10-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
801 |
| - " '00000000000000000000000000001000000100000010-11-10-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
802 |
| - " '000000000000000000000100000010000001000-10010-11-10-1': array([ 25., 0., 0., 0., 0., 0., 0.]),\n", |
803 |
| - " '000000000000001000000-100000010000001-1000001-1001-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
804 |
| - " '000000000000001000000-100000010000001-1000001-1001-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
805 |
| - " '000000010000001000000-10000001-1000001-1000001-1001-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
806 |
| - " '000000010000001000000-100000010000001-1000001-1001-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
807 |
| - " '100000010000001000-100-100010-11-110-1-1-11-1101-111-1-111-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
808 |
| - " '100000010000001000000-100000-11-1000-1-11-1101-111-1-111-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
809 |
| - " '100000010000001000000-100000-11-110-1-1-11-1101-111-1-111-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
810 |
| - " '100000010000001000000-10000001-1000-1-11-1001-111-1-111-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
811 |
| - " '100000010000001000000-10000001-1000-101-1001-111-1-101-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
812 |
| - " '100000010000001000000-10000001-1000001-1000-101-1001-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
813 |
| - " '100000010000001000000-10000001-1000001-1000001-1001-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
814 |
| - " '100000010000001000000-10000001-1000001-1001-101-1-101-1-1': array([ 0., 0., 0., 0., 0., 0., 0.])}" |
| 793 | + "{'0000000000000000000000000000000000000000000000000': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 794 | + " '00000000000000000000000000000000000000000000100-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 795 | + " '00000000000000000000000000000000000000000010-10000': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 796 | + " '00000000000000000000000000000000000000000010100-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 797 | + " '00000000000000000000000000000000000000000010100-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 798 | + " '0000000000000000000000000000000000010-1000010100-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 799 | + " '0000000000000000000000000000000000010000-1010-110-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 800 | + " '00000000000000000000000000000000000100000010-100-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 801 | + " '00000000000000000000000000000000000100000010-10000': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 802 | + " '00000000000000000000000000000000000100000010-110-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 803 | + " '00000000000000000000000000000000000100000010100-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 804 | + " '0000000000000000000000000000100000010-100001-1100-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 805 | + " '0000000000000000000000000000100000010-1000010100-1-1': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 806 | + " '0000000000000000000000000000100000010000-1010-11-1-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 807 | + " '0000000000000000000000000000100000010000-1010-110-10': array([ 0., 0., 0., 0., 0., 0., 0.]),\n", |
| 808 | + " '0000000000000000000001000000100000010-100001-1100-1-1': array([ 25., 0., 0., 0., 0., 0., 0.]),\n", |
| 809 | + " '0000000000000000000001000000100000010000-1010-11-1-10': array([ 25., 0., 0., 0., 0., 0., 0.])}" |
815 | 810 | ]
|
816 | 811 | },
|
817 |
| - "execution_count": 59, |
| 812 | + "execution_count": 73, |
818 | 813 | "metadata": {},
|
819 | 814 | "output_type": "execute_result"
|
820 | 815 | }
|
|
0 commit comments