fork download
  1. import py2048
  2. import numpy as np
  3. import math
  4. import torch
  5. import torch.nn as nn
  6. import torch.nn.functional as F
  7. import matplotlib.pyplot as plt
  8.  
  9. # define some parameters
  10. epochs = 5000
  11. input_size = 256
  12. hidden_layer_1 = 450
  13. hidden_layer_2 = 400
  14. hidden_layer_3 = 200
  15. output_size = 4
  16. batch_size = 32
  17. min_prob = 0.000001
  18.  
  19.  
  20. class Net(nn.Module):
  21. def __init__(self):
  22. super(Net, self).__init__()
  23. self.fc1 = nn.Linear(input_size, hidden_layer_1)
  24. self.fc2 = nn.Linear(hidden_layer_1, hidden_layer_2)
  25. self.fc3 = nn.Linear(hidden_layer_2, hidden_layer_3)
  26. self.fc4 = nn.Linear(hidden_layer_3, output_size)
  27.  
  28.  
  29. def forward(self, x):
  30. x = F.relu(self.fc1(x))
  31. x = F.relu(self.fc2(x))
  32. x = F.relu(self.fc3(x))
  33. x = self.fc4(x)
  34. return F.softmax(x, dim=0)
  35.  
  36.  
  37. class PNAgent:
  38. def __init__(self, learning_rate, gamma):
  39. self.learning_rate = learning_rate
  40. self.gamma = gamma
  41. self.rewards = np.array([])
  42. self.states = []
  43. self.actions = []
  44. self.probs = []
  45. self.model = Net().double()
  46. self.loss = torch.nn.NLLLoss()
  47. self.opt = torch.optim.RMSprop(self.model.parameters(), lr=learning_rate)
  48.  
  49. def add_episode(self, states, actions, probs, rewards):
  50. self.states.append(states)
  51. self.actions.append(actions)
  52. self.probs.append(probs)
  53. self.rewards = np.append(self.rewards, rewards)
  54.  
  55. def act(self, state):
  56. #print(state)
  57. t = torch.from_numpy(self.encode(state.flatten())).type(torch.DoubleTensor)
  58. state = torch.autograd.Variable(t, requires_grad=False)
  59. out = self.model(state)
  60. action = np.random.choice(4, 1, p=(out.detach().numpy()))[0]
  61. return action, out
  62.  
  63. # taken from: https://g...content-available-to-author-only...b.com/karpathy/a4166c7fe253700972fcbc77e4ea32c5
  64. def discount_rewards(self, rewards, size):
  65. discounted_rewards = np.zeros_like(rewards)
  66. running_gamma = 1
  67. for t in reversed(range(0, rewards.size)):
  68. discounted_rewards[t] = rewards[t] * running_gamma
  69. running_gamma *= self.gamma
  70. return discounted_rewards
  71.  
  72. def train(self):
  73. R = np.vstack(self.rewards)
  74. R -= np.mean(R)
  75. R = R / np.std(R)
  76.  
  77. for i in range(len(self.states)):
  78. r = torch.from_numpy(R[i]).type(torch.DoubleTensor)
  79. for j in range(len(self.states[i])):
  80. x = torch.from_numpy(self.encode(self.states[i][j].flatten())).type(torch.DoubleTensor)
  81. state = torch.autograd.Variable(x, requires_grad=False)
  82. action = torch.from_numpy(np.array(self.actions[i][j])).type(torch.DoubleTensor)
  83.  
  84. self.opt.zero_grad()
  85. loss = torch.sum(action - (self.model(state)) * r)
  86. #if j==0: print(loss)
  87. loss.backward()
  88. self.opt.step()
  89.  
  90. self.states, self.probs, self.actions, self.rewards = [], [], [], np.array([])
  91.  
  92. def load(self, name):
  93. self.model.load_weights(name)
  94.  
  95. def save(self, name):
  96. self.model.save_weights(name)
  97.  
  98. def encode(self, state):
  99. new_state = np.array([])
  100. for block in state:
  101. one_hot = [0] * 16
  102. if block != 0:
  103. one_hot[int(block)] = 1
  104. new_state = np.append(new_state, one_hot)
  105. return new_state
  106.  
  107.  
  108. if __name__ == "__main__":
  109. agent = PNAgent(0.01, 0.95)
  110. env = py2048.GameBoard(4, 4)
  111. avg_scores = []
  112.  
  113. for epoch in range(epochs):
  114. scores = []
  115. for game in range(batch_size):
  116. states = []
  117. actions = []
  118. probs = []
  119.  
  120. while True:
  121. action, prob = agent.act(env.board)
  122. state, _ , done = env.step(action)
  123. states.append(state)
  124. one_hot = [0] * 4
  125. one_hot[action] = 1
  126. actions.append(one_hot)
  127. probs.append(prob)
  128.  
  129. if done:
  130. scores.append(env.score)
  131. #print("game:", game+1, "score:", env.score, "highest:", np.max(env.exponentiate()))
  132. agent.add_episode(states, actions, probs, np.sum(env.exponentiate()))
  133. env.reset()
  134. break
  135. #print(states, actions)
  136. agent.train()
  137. avg_scores.append(np.mean(scores))
  138. print("epoch:", epoch, "mean_score:", avg_scores[-1])
  139.  
  140. plt.plot(avg_scores)
  141. plt.xlabel("Batch")
  142. plt.ylabel("Average terminal sum of tiles")
  143. plt.show()
  144. plt.savefig("avg_board.png")
  145.  
Runtime error #stdin #stdout #stderr 0.02s 9312KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "./prog.py", line 1, in <module>
ImportError: No module named 'py2048'