ml_yahtzee/agent.py at main · chingor13/ml_yahtzee

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

from tensorflow.keras.optimizers import Adam

from tensorflow.keras.models import Sequential

from tensorflow.keras import Input

from tensorflow.keras.layers import Dense

from tensorflow.keras.utils import to_categorical

import numpy

import collections

import random

from typing import List, Union

from yahtzee import Player, Round

class State:

def __init__(self, data: List[int] = []) -> None:

self.data = data

self.num_fields = len(data)

def to_data(self) -> List[int]:

return numpy.asarray(self.data)

class Agent:

"""Deep Q-Network Agent"""

def __init__(

self,

weights_path: str,

input_size: int,

output_size: int,

epsilon: float = 1.0,

learning_rate: float = 0.0005,

first_layer_size: int = 50,

second_layer_size: int = 300,

third_layer_size: int = 50,

memory_size: int = 2500,

load_weights: bool = False,

) -> None:

self.gamma = 0.9

self.short_memory = numpy.array([])

self.learning_rate = learning_rate

self.epsilon = epsilon

self.first_layer = first_layer_size

self.second_layer = second_layer_size

self.third_layer = third_layer_size

self.input_size = input_size

self.output_size = output_size

self.memory = collections.deque(maxlen=memory_size)

self.weights = weights_path

self.load_weights = load_weights

self.model = self.network()

def network(self):

model = Sequential()

model.add(Input(shape=(self.input_size)))

model.add(Dense(self.first_layer, activation="relu"))

model.add(Dense(self.second_layer, activation="relu"))

model.add(Dense(self.third_layer, activation="relu"))

model.add(Dense(self.output_size, activation="softmax"))

opt = Adam(self.learning_rate)

model.compile(loss="mse", optimizer=opt)

if self.load_weights:

model.load_weights(self.weights)

return model

def get_state(self, player: Player, round: Round) -> State:

raise Exception("Not implemented")

def get_reward(self, player: Player, round: Round):

return player.total_score()

def remember(

self, state: State, action: int, reward: int, next_state: State, done: bool

):

self.memory.append(

(state.to_data(), action, reward, next_state.to_data(), done)

)

def replay_new(self, memory, batch_size):

if len(memory) > batch_size:

minibatch = random.sample(memory, batch_size)

else:

minibatch = memory

for state, action, reward, next_state, done in minibatch:

target = reward

if not done:

target = reward + self.gamma * numpy.amax(

self.model.predict(numpy.array([next_state]))[0]

)

target_f = self.model.predict(numpy.array([state]))

target_f[0][numpy.argmax(action)] = target

self.model.fit(numpy.array([state]), target_f, epochs=1, verbose=0)

def train_short_memory(

self, state: State, action: int, reward: int, next_state: State, done: bool

):

target = reward

if not done:

target = reward + self.gamma * numpy.amax(

self.model.predict(

next_state.to_data().reshape((1, next_state.num_fields))

)[0]

)

target_f = self.model.predict(state.to_data().reshape((1, state.num_fields)))

target_f[0][numpy.argmax(action)] = target

self.model.fit(

state.to_data().reshape((1, state.num_fields)),

target_f,

epochs=1,

verbose=0,

)

def predict(self, state_old: State) -> List[Union[int, float]]:

if random.uniform(0, 1) < self.epsilon:

print("using random prediction")

return to_categorical(

random.randint(0, self.output_size - 1), num_classes=self.output_size

)

else:

# predict action based on the old state

prediction = self.model.predict(

state_old.to_data().reshape((1, state_old.num_fields))

)

return prediction[0]

class RollAgent(Agent):

def get_state(self, player: Player, round: Round) -> State:

state = [

score if score is not None else -1 for score in player.current_scores()

]

state.extend(round.dice)

state.append(round.rolls)

return State(state)

class SlotAgent(Agent):

def get_state(self, player: Player, round: Round) -> State:

state = [

score if score is not None else -1 for score in player.current_scores()

]

state.extend(round.dice)

return State(state)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

agent.py

agent.py

Files

agent.py

Latest commit

History

agent.py

File metadata and controls