-
Notifications
You must be signed in to change notification settings - Fork 1
/
agent.py
144 lines (125 loc) · 4.74 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import numpy
import collections
import random
from typing import List, Union
from yahtzee import Player, Round
class State:
def __init__(self, data: List[int] = []) -> None:
self.data = data
self.num_fields = len(data)
def to_data(self) -> List[int]:
return numpy.asarray(self.data)
class Agent:
"""Deep Q-Network Agent"""
def __init__(
self,
weights_path: str,
input_size: int,
output_size: int,
epsilon: float = 1.0,
learning_rate: float = 0.0005,
first_layer_size: int = 50,
second_layer_size: int = 300,
third_layer_size: int = 50,
memory_size: int = 2500,
load_weights: bool = False,
) -> None:
self.gamma = 0.9
self.short_memory = numpy.array([])
self.learning_rate = learning_rate
self.epsilon = epsilon
self.first_layer = first_layer_size
self.second_layer = second_layer_size
self.third_layer = third_layer_size
self.input_size = input_size
self.output_size = output_size
self.memory = collections.deque(maxlen=memory_size)
self.weights = weights_path
self.load_weights = load_weights
self.model = self.network()
def network(self):
model = Sequential()
model.add(Input(shape=(self.input_size)))
model.add(Dense(self.first_layer, activation="relu"))
model.add(Dense(self.second_layer, activation="relu"))
model.add(Dense(self.third_layer, activation="relu"))
model.add(Dense(self.output_size, activation="softmax"))
opt = Adam(self.learning_rate)
model.compile(loss="mse", optimizer=opt)
if self.load_weights:
model.load_weights(self.weights)
return model
def get_state(self, player: Player, round: Round) -> State:
raise Exception("Not implemented")
def get_reward(self, player: Player, round: Round):
return player.total_score()
def remember(
self, state: State, action: int, reward: int, next_state: State, done: bool
):
self.memory.append(
(state.to_data(), action, reward, next_state.to_data(), done)
)
def replay_new(self, memory, batch_size):
if len(memory) > batch_size:
minibatch = random.sample(memory, batch_size)
else:
minibatch = memory
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
target = reward + self.gamma * numpy.amax(
self.model.predict(numpy.array([next_state]))[0]
)
target_f = self.model.predict(numpy.array([state]))
target_f[0][numpy.argmax(action)] = target
self.model.fit(numpy.array([state]), target_f, epochs=1, verbose=0)
def train_short_memory(
self, state: State, action: int, reward: int, next_state: State, done: bool
):
target = reward
if not done:
target = reward + self.gamma * numpy.amax(
self.model.predict(
next_state.to_data().reshape((1, next_state.num_fields))
)[0]
)
target_f = self.model.predict(state.to_data().reshape((1, state.num_fields)))
target_f[0][numpy.argmax(action)] = target
self.model.fit(
state.to_data().reshape((1, state.num_fields)),
target_f,
epochs=1,
verbose=0,
)
def predict(self, state_old: State) -> List[Union[int, float]]:
if random.uniform(0, 1) < self.epsilon:
print("using random prediction")
return to_categorical(
random.randint(0, self.output_size - 1), num_classes=self.output_size
)
else:
# predict action based on the old state
prediction = self.model.predict(
state_old.to_data().reshape((1, state_old.num_fields))
)
return prediction[0]
class RollAgent(Agent):
def get_state(self, player: Player, round: Round) -> State:
state = [
score if score is not None else -1 for score in player.current_scores()
]
state.extend(round.dice)
state.append(round.rolls)
return State(state)
class SlotAgent(Agent):
def get_state(self, player: Player, round: Round) -> State:
state = [
score if score is not None else -1 for score in player.current_scores()
]
state.extend(round.dice)
return State(state)