-
Notifications
You must be signed in to change notification settings - Fork 0
/
ai.rb
110 lines (86 loc) · 2.66 KB
/
ai.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
TicTacToeLabel = :tic_tac_toe
class Ai
attr_reader :world, :step_size, :my_side
attr_accessor :values
def self.read(path:)
body = {}
File.open(path, 'rb') do |file|
body = Marshal.load file
end
body
end
def initialize(world:, learned_values_path: nil)
@playable_worlds = [TicTacToeLabel]
@world = world
@step_size = 0.1
@my_side = 'o'
if learned_values_path
self.values = self.class.read(path: learned_values_path)
else
self.values = {}
end
@values.delete_if { |k, _v| k.nil? }
end
def start
world_state = world.clone_state(world.reset)
next_value = init_value(state: world_state)
begin
puts "Turn #{world.turn_num}"
old_state = world.clone_state(world_state)
old_value = next_value
your_move = determine_move(state: old_state)
world_state = world.clone_state(world.step(move: your_move))
if world.ended?
re_evaluate_values(state: world_state, old_value: old_value, old_state: old_state)
break
end
old_state = world.clone_state(world_state)
puts "Waiting your your input"
puts world.to_s
human_move_id = STDIN.gets
human_move = world.parse_human_move(move_id: human_move_id,
symbol: world.opposite_side(side: my_side))
world_state = world.clone_state(world.step(move: human_move))
# Re-Evaluate the values
re_evaluate_values(state: world_state, old_value: old_value, old_state: old_state)
pp values
end while !world.ended?
File.open("Ai_values.dump", 'wb') do |file|
file.print Marshal.dump(values)
end
puts "RESULT"
puts world.to_s
end
private
def re_evaluate_values(state:, old_value:, old_state:)
next_value = values[state] || init_value(state: state)
new_value = old_value + (step_size * ( next_value - old_value ))
values[old_state] = new_value
end
def init_value(state:)
value = case
when win?(state: state)
1
when lose?(state: state) || world.ended?
0
else
0.5
end
values[state] = value
value
end
def win?(state:)
world.won?(side: my_side, tmp_state: state)
end
def lose?(state:)
world.won?(side: world.opposite_side(side: my_side), tmp_state: state)
end
def determine_move(state:)
pp world.next_possible_moves_states.map { |ms| values[ms[:state]] }
next_move_state_greedy = world.next_possible_moves_states.max_by do |move_and_state|
state = move_and_state[:state]
values[state] || init_value(state: state)
end
next_move_state_greedy[:move]
end
end