import tictactoe_solver
import tictactoe
import pandas as pd

game = tictactoe.TicTacToeGame()
solver = tictactoe_solver.TicTacToeSolver(game)
solver.solve()

data = solver.action_dict

df = pd.DataFrame(list(data.items()), columns=["state", "action"])
df[['player', 'board']] = df['state'].apply(pd.Series)
df.head()


import numpy as np

board_colnames = ['board_' + str(i) for i in range(9)]
df[board_colnames] = df['board'].apply(pd.Series)

action_dummies = pd.get_dummies(df['action'], prefix='action')
df2 = pd.concat([df, action_dummies], axis=1)

action_colnames = ['action_' + str(i) for i in range(9)]
df2 = df2.drop(['board', 'action', 'state'], axis=1)

data_np = df2.to_numpy()

current_player_data = data_np[:,0:1]
player1_channel = (data_np[:,1:10] == 1).astype(int)
player2_channel = (data_np[:,1:10] == 2).astype(int)
state_data = np.concatenate([current_player_data, player1_channel, player2_channel], axis=1)
action_data = data_np[:,10:]

sample_idx = np.random.choice(state_data.shape[0], 5, replace=False)

print(df2.iloc[sample_idx])
print(state_data[sample_idx,:])
print(action_data[sample_idx,:])

      player  board_0  board_1  board_2  board_3  board_4  board_5  board_6  \
1056       0        1        0        0        2        0        1        2   
2800       1        2        0        1        0        1        2        2   
3798       0        0        0        0        1        2        0        2   
3507       0        2        0        2        1        0        0        1   
1597       0        2        1        1        0        0        1        2   

      board_7  board_8  action_0  action_1  action_2  action_3  action_4  \
1056        2        1         0         1         0         0         0   
2800        1        1         0         0         0         1         0   
3798        1        0         0         0         1         0         0   
3507        2        1         0         1         0         0         0   
1597        2        0         0         0         0         0         0   

      action_5  action_6  action_7  action_8  
1056         0         0         0         0  
2800         0         0         0         0  
3798         0         0         0         0  
3507         0         0         0         0  
1597         0         0         0         1  
[[0 1 0 0 0 0 1 0 0 1 0 0 0 1 0 0 1 1 0]
 [1 0 0 1 0 1 0 0 1 1 1 0 0 0 0 1 1 0 0]
 [0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 1 0 0]
 [0 0 0 0 1 0 0 1 0 1 1 0 1 0 0 0 0 1 0]
 [0 0 1 1 0 0 1 0 0 0 1 0 0 0 0 0 1 1 0]]
[[0 1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1]]


train_test_split = 0.8

train_idx = np.random.choice(state_data.shape[0], int(state_data.shape[0] * train_test_split), replace=False)
train_state_data = state_data[train_idx]
train_action_data = action_data[train_idx]
test_state_data = np.delete(state_data, train_idx, axis=0)
test_action_data = np.delete(action_data, train_idx, axis=0)

print(train_state_data.shape)
print(train_action_data.shape)
print(test_state_data.shape)
print(test_action_data.shape)

(3616, 19)
(3616, 9)
(904, 19)
(904, 9)


import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

class TicTacToeDataset(Dataset):
    def __init__(self, state_data, action_data):
        self.state_data = state_data
        self.action_data = action_data

    def __len__(self):
        return self.state_data.shape[0]

    def __getitem__(self, idx):
        return self.state_data[idx, :], self.action_data[idx, :]

train_ds = TicTacToeDataset(train_state_data, train_action_data)
test_ds = TicTacToeDataset(test_state_data, test_action_data)

batch_size = 32
train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_ds, batch_size=batch_size, shuffle=True)


import torch.nn as nn
import torch.nn.functional as F

# Define the neural network
class NeuralNetwork(nn.Module):
    def __init__(self, layer1_size, layer2_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(19, layer1_size)  # Fully connected layer 1
        self.fc2 = nn.Linear(layer1_size, layer2_size)  # Fully connected layer 2
        self.fc3 = nn.Linear(layer2_size, 9)   # Fully connected layer 3 (output layer)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

def train_model(train_dataloader, test_dataloader, n_epochs, layer1_size, layer2_size, lr=0.01):

    # Create an instance of the neural network
    model = NeuralNetwork(layer1_size, layer2_size)

    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    # Training loop
    train_losses = []
    test_losses = []
    for epoch in range(n_epochs):
        train_running_loss = 0.0
        model.train()
        
        for inputs, targets in train_dataloader:

            outputs = model(inputs.float())
            probabilities = F.softmax(outputs, dim=1)  # Apply softmax activation
            _, predicted = torch.max(probabilities, 1)  # Get the index of the highest probability
            
            train_loss = criterion(outputs, targets.float())
            train_running_loss += train_loss.item()

            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()

            # Print loss at every 10th epoch
        if (epoch+1) % 10 == 0:
            avg_train_loss = train_running_loss / len(train_dataloader)
            train_losses.append(avg_train_loss)
            
            model.eval()
            test_running_loss = 0.0
            with torch.no_grad():
                for inputs, targets in test_dataloader:

                    outputs = model(inputs.float())
                    test_loss = criterion(outputs, targets.float())
                    test_running_loss += test_loss.item()

                avg_test_loss = test_running_loss / len(test_dataloader)
                test_losses.append(avg_test_loss)
                print(f'Epoch: {epoch+1}, Train Loss: {avg_train_loss:0.5f}, Test Loss: {avg_test_loss:0.5f}')

    # Get the final predicted class
    _, final_predicted = torch.max(probabilities, 1)

    return model, train_losses, test_losses

n_epochs = 200
layer1_size = 1024
layer2_size = 1024
model, train_losses, test_losses = train_model(train_dataloader, test_dataloader, n_epochs, layer1_size, layer2_size)

Epoch: 10, Train Loss: 1.19745, Test Loss: 1.24235
Epoch: 20, Train Loss: 0.85133, Test Loss: 0.95503
Epoch: 30, Train Loss: 0.67318, Test Loss: 0.79425
Epoch: 40, Train Loss: 0.55312, Test Loss: 0.71450
Epoch: 50, Train Loss: 0.47098, Test Loss: 0.62849
Epoch: 60, Train Loss: 0.41272, Test Loss: 0.59147
Epoch: 70, Train Loss: 0.36733, Test Loss: 0.55804
Epoch: 80, Train Loss: 0.33168, Test Loss: 0.56320
Epoch: 90, Train Loss: 0.30116, Test Loss: 0.52988
Epoch: 100, Train Loss: 0.27625, Test Loss: 0.52523
Epoch: 110, Train Loss: 0.25317, Test Loss: 0.50994
Epoch: 120, Train Loss: 0.23210, Test Loss: 0.52215
Epoch: 130, Train Loss: 0.21351, Test Loss: 0.50712
Epoch: 140, Train Loss: 0.19699, Test Loss: 0.51592
Epoch: 150, Train Loss: 0.18181, Test Loss: 0.50446
Epoch: 160, Train Loss: 0.16726, Test Loss: 0.51125
Epoch: 170, Train Loss: 0.15374, Test Loss: 0.50968
Epoch: 180, Train Loss: 0.14128, Test Loss: 0.54236
Epoch: 190, Train Loss: 0.13176, Test Loss: 0.52450
Epoch: 200, Train Loss: 0.12027, Test Loss: 0.52291


multiple_actions_dict = solver.multiple_actions_dict

def state_from_mat(mat):
    player_data = mat[0]
    p1_data = mat[1:10]
    p2_data = mat[10:19]
    return (player_data, tuple(p1_data * 1 + p2_data * 2))

inputs_vec = []
targets_vec = []
probs_vec = []
errors_vec = []

# for inputs, targets in single_dataloader:
inputs, targets = test_ds[:]

outputs = model(torch.from_numpy(inputs).float())
probabilities = F.softmax(outputs, dim=1)
predict_actions = torch.argmax(probabilities, dim=1)

error_idx = []
for i in range(len(inputs)):
    predict_action = predict_actions[i].item()

    if predict_action in multiple_actions_dict[state_from_mat(inputs[i])]:
        error = 0.0
    else:
        error = 1.0
        error_idx.append(i)
n_errors = len(error_idx)
print(f'We have {n_errors} errors out of {len(inputs)} test samples.')

We have 102 errors out of 904 test samples.


for i in range(5):
    state = state_from_mat(inputs[error_idx[i], :])
    player, board = state
    target = targets[error_idx[i], :]
    probs = probabilities[error_idx[i], :].detach().numpy()
    print(f'Example #{i+1}')
    print('Active player: ' + str(player+1))
    print('Board:')
    print(str(board[0:3]) + "\n" + str(board[3:6]) + "\n" + str(board[6:9]))
    print('Target (i.e. optimal action according to solver):')
    print(str(target[0:3]) + "\n" + str(target[3:6]) + "\n" + str(target[6:9]))
    print('Optimal action according to neural net:')
    nn_action = np.zeros(9, dtype=int)
    nn_action[predict_actions[error_idx[i]].item()] = 1
    print(str(nn_action[0:3]) + "\n" + str(nn_action[3:6]) + "\n" + str(nn_action[6:9]))
    print('Action probabilities according to neural net:')
    print(str(probs[0:3]) + "\n" + str(probs[3:6]) + "\n" + str(probs[6:9]))
    
    print()

Example #1
Active player: 2
Board:
(1, 2, 1)
(1, 0, 2)
(0, 0, 0)
Target (i.e. optimal action according to solver):
[0 0 0]
[0 0 0]
[1 0 0]
Optimal action according to neural net:
[0 0 0]
[0 1 0]
[0 0 0]
Action probabilities according to neural net:
[7.1795661e-08 4.7225498e-08 2.1284283e-07]
[4.5707097e-06 9.7259074e-01 7.7326625e-09]
[2.6755188e-02 6.3632248e-04 1.2866872e-05]

Example #2
Active player: 2
Board:
(1, 2, 1)
(0, 1, 2)
(2, 1, 0)
Target (i.e. optimal action according to solver):
[0 0 0]
[0 0 0]
[0 0 1]
Optimal action according to neural net:
[0 0 0]
[1 0 0]
[0 0 0]
Action probabilities according to neural net:
[9.5932783e-06 2.7670898e-05 4.0982563e-06]
[5.2594292e-01 1.9856998e-06 1.2750725e-06]
[4.2330477e-05 7.3809890e-05 4.7389629e-01]

Example #3
Active player: 1
Board:
(1, 2, 1)
(0, 1, 2)
(2, 0, 0)
Target (i.e. optimal action according to solver):
[0 0 0]
[0 0 0]
[0 0 1]
Optimal action according to neural net:
[0 0 0]
[1 0 0]
[0 0 0]
Action probabilities according to neural net:
[4.0669929e-05 4.2807475e-05 8.4560070e-06]
[8.0939001e-01 1.7846187e-07 2.8277625e-06]
[3.1919285e-06 4.2818155e-02 1.4769369e-01]

Example #4
Active player: 1
Board:
(1, 2, 1)
(0, 0, 2)
(2, 0, 1)
Target (i.e. optimal action according to solver):
[0 0 0]
[0 1 0]
[0 0 0]
Optimal action according to neural net:
[0 0 0]
[1 0 0]
[0 0 0]
Action probabilities according to neural net:
[7.4526392e-06 1.2831084e-05 2.3809184e-06]
[5.6145149e-01 4.3721297e-01 2.7234321e-07]
[5.8024239e-09 1.3119604e-03 6.4821285e-07]

Example #5
Active player: 2
Board:
(1, 2, 1)
(1, 0, 0)
(2, 0, 0)
Target (i.e. optimal action according to solver):
[0 0 0]
[0 0 0]
[0 1 0]
Optimal action according to neural net:
[0 0 0]
[0 1 0]
[0 0 0]
Action probabilities according to neural net:
[2.9910932e-08 5.3376266e-07 5.0426928e-07]
[2.9707589e-06 9.6668452e-01 5.4177106e-04]
[4.1024256e-07 2.6694551e-02 6.0745855e-03]


train_state_data = state_data
train_action_data = action_data

print(train_state_data.shape)
print(train_action_data.shape)


import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

class TicTacToeDataset(Dataset):
    def __init__(self, state_data, action_data):
        self.state_data = state_data
        self.action_data = action_data

    def __len__(self):
        return self.state_data.shape[0]

    def __getitem__(self, idx):
        return self.state_data[idx, :], self.action_data[idx, :]

train_ds = TicTacToeDataset(train_state_data, train_action_data)

batch_size = 32
train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

(4520, 19)
(4520, 9)


import torch.nn as nn
import torch.nn.functional as F

# Define the neural network
class NeuralNetwork(nn.Module):
    def __init__(self, layer1_size, layer2_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(19, layer1_size)  # Fully connected layer 1
        self.fc2 = nn.Linear(layer1_size, layer2_size)  # Fully connected layer 2
        self.fc3 = nn.Linear(layer2_size, 9)   # Fully connected layer 3 (output layer)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

def train_model(train_dataloader, n_epochs, layer1_size, layer2_size, lr=0.01):

    # Create an instance of the neural network
    model = NeuralNetwork(layer1_size, layer2_size)

    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    # Training loop
    train_losses = []
    for epoch in range(n_epochs):
        train_running_loss = 0.0
        model.train()
        
        for inputs, targets in train_dataloader:
            # inputs = torch.tensor(inputs, dtype=torch.float32)
            # targets = torch.tensor(targets, dtype=torch.long)

            outputs = model(inputs.float())
            probabilities = F.softmax(outputs, dim=1)  # Apply softmax activation
            _, predicted = torch.max(probabilities, 1)  # Get the index of the highest probability
            
            train_loss = criterion(outputs, targets.float())
            train_running_loss += train_loss.item()

            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()

            # Print loss at every 10th epoch
        if (epoch+1) % 10 == 0:
            avg_train_loss = train_running_loss / len(train_dataloader)
            train_losses.append(avg_train_loss)
            
           
            print(f'Epoch: {epoch+1}, Train Loss: {avg_train_loss:0.5f}')

    # Get the final predicted class
    _, final_predicted = torch.max(probabilities, 1)

    return model, train_losses

n_epochs = 1200
layer1_size = 50
layer2_size = 50
model, train_losses = train_model(train_dataloader, n_epochs, layer1_size, layer2_size)

Epoch: 10, Train Loss: 1.92973
Epoch: 20, Train Loss: 1.47595
Epoch: 30, Train Loss: 1.24634
Epoch: 40, Train Loss: 1.07878
Epoch: 50, Train Loss: 0.90451
Epoch: 60, Train Loss: 0.78146
Epoch: 70, Train Loss: 0.69240
Epoch: 80, Train Loss: 0.62658
Epoch: 90, Train Loss: 0.57745
Epoch: 100, Train Loss: 0.53531
Epoch: 110, Train Loss: 0.49794
Epoch: 120, Train Loss: 0.46679
Epoch: 130, Train Loss: 0.43869
Epoch: 140, Train Loss: 0.41384
Epoch: 150, Train Loss: 0.39635
Epoch: 160, Train Loss: 0.37595
Epoch: 170, Train Loss: 0.36036
Epoch: 180, Train Loss: 0.34194
Epoch: 190, Train Loss: 0.32994
Epoch: 200, Train Loss: 0.31617
Epoch: 210, Train Loss: 0.30799
Epoch: 220, Train Loss: 0.29619
Epoch: 230, Train Loss: 0.28416
Epoch: 240, Train Loss: 0.27545
Epoch: 250, Train Loss: 0.26795
Epoch: 260, Train Loss: 0.26162
Epoch: 270, Train Loss: 0.25297
Epoch: 280, Train Loss: 0.24906
Epoch: 290, Train Loss: 0.23940
Epoch: 300, Train Loss: 0.23367
Epoch: 310, Train Loss: 0.22976
Epoch: 320, Train Loss: 0.22300
Epoch: 330, Train Loss: 0.21650
Epoch: 340, Train Loss: 0.21361
Epoch: 350, Train Loss: 0.20617
Epoch: 360, Train Loss: 0.20311
Epoch: 370, Train Loss: 0.19722
Epoch: 380, Train Loss: 0.19303
Epoch: 390, Train Loss: 0.19119
Epoch: 400, Train Loss: 0.18447
Epoch: 410, Train Loss: 0.18160
Epoch: 420, Train Loss: 0.17590
Epoch: 430, Train Loss: 0.16890
Epoch: 440, Train Loss: 0.16609
Epoch: 450, Train Loss: 0.16383
Epoch: 460, Train Loss: 0.15843
Epoch: 470, Train Loss: 0.15489
Epoch: 480, Train Loss: 0.15272
Epoch: 490, Train Loss: 0.14738
Epoch: 500, Train Loss: 0.14235
Epoch: 510, Train Loss: 0.13898
Epoch: 520, Train Loss: 0.13653
Epoch: 530, Train Loss: 0.13342
Epoch: 540, Train Loss: 0.12963
Epoch: 550, Train Loss: 0.12562
Epoch: 560, Train Loss: 0.12441
Epoch: 570, Train Loss: 0.12009
Epoch: 580, Train Loss: 0.11741
Epoch: 590, Train Loss: 0.11380
Epoch: 600, Train Loss: 0.11015
Epoch: 610, Train Loss: 0.10726
Epoch: 620, Train Loss: 0.10253
Epoch: 630, Train Loss: 0.10250
Epoch: 640, Train Loss: 0.09815
Epoch: 650, Train Loss: 0.09551
Epoch: 660, Train Loss: 0.09379
Epoch: 670, Train Loss: 0.08948
Epoch: 680, Train Loss: 0.08641
Epoch: 690, Train Loss: 0.08439
Epoch: 700, Train Loss: 0.08490
Epoch: 710, Train Loss: 0.08101
Epoch: 720, Train Loss: 0.08046
Epoch: 730, Train Loss: 0.07578
Epoch: 740, Train Loss: 0.07630
Epoch: 750, Train Loss: 0.07218
Epoch: 760, Train Loss: 0.06955
Epoch: 770, Train Loss: 0.06734
Epoch: 780, Train Loss: 0.06904
Epoch: 790, Train Loss: 0.06393
Epoch: 800, Train Loss: 0.06329
Epoch: 810, Train Loss: 0.06138
Epoch: 820, Train Loss: 0.05845
Epoch: 830, Train Loss: 0.05723
Epoch: 840, Train Loss: 0.05535
Epoch: 850, Train Loss: 0.05381
Epoch: 860, Train Loss: 0.05177
Epoch: 870, Train Loss: 0.05128
Epoch: 880, Train Loss: 0.04949
Epoch: 890, Train Loss: 0.04780
Epoch: 900, Train Loss: 0.04552
Epoch: 910, Train Loss: 0.04496
Epoch: 920, Train Loss: 0.04287
Epoch: 930, Train Loss: 0.04354
Epoch: 940, Train Loss: 0.04041
Epoch: 950, Train Loss: 0.03917
Epoch: 960, Train Loss: 0.03991
Epoch: 970, Train Loss: 0.03806
Epoch: 980, Train Loss: 0.03502
Epoch: 990, Train Loss: 0.03463
Epoch: 1000, Train Loss: 0.03355
Epoch: 1010, Train Loss: 0.03249
Epoch: 1020, Train Loss: 0.03191
Epoch: 1030, Train Loss: 0.03155
Epoch: 1040, Train Loss: 0.03008
Epoch: 1050, Train Loss: 0.02858
Epoch: 1060, Train Loss: 0.02801
Epoch: 1070, Train Loss: 0.02778
Epoch: 1080, Train Loss: 0.02653
Epoch: 1090, Train Loss: 0.02668
Epoch: 1100, Train Loss: 0.02508
Epoch: 1110, Train Loss: 0.02440
Epoch: 1120, Train Loss: 0.02439
Epoch: 1130, Train Loss: 0.02328
Epoch: 1140, Train Loss: 0.02256
Epoch: 1150, Train Loss: 0.02172
Epoch: 1160, Train Loss: 0.02146
Epoch: 1170, Train Loss: 0.02042
Epoch: 1180, Train Loss: 0.02046
Epoch: 1190, Train Loss: 0.01994
Epoch: 1200, Train Loss: 0.01966


def state_from_mat(mat):
    player_data = mat[0]
    p1_data = mat[1:10]
    p2_data = mat[10:19]
    return (player_data, tuple(p1_data * 1 + p2_data * 2))

inputs_vec = []
targets_vec = []
probs_vec = []
errors_vec = []

# for inputs, targets in single_dataloader:
inputs, targets = train_ds[:]

outputs = model(torch.from_numpy(inputs).float())
probabilities = F.softmax(outputs, dim=1)
predict_actions = torch.argmax(probabilities, dim=1)

error_idx = []
for i in range(len(inputs)):
    predict_action = predict_actions[i].item()

    if predict_action in multiple_actions_dict[state_from_mat(inputs[i])]:
        error = 0.0
    else:
        error = 1.0
        error_idx.append(i)
print(len(error_idx))

0

	state	action	player	board
0	(0, (1, 2, 1, 2, 1, 2, 2, 1, 0))	8	0	(1, 2, 1, 2, 1, 2, 2, 1, 0)
1	(0, (1, 2, 1, 2, 1, 2, 0, 1, 2))	6	0	(1, 2, 1, 2, 1, 2, 0, 1, 2)
2	(1, (1, 2, 1, 2, 1, 2, 0, 1, 0))	6	1	(1, 2, 1, 2, 1, 2, 0, 1, 0)
3	(0, (1, 2, 1, 2, 1, 2, 0, 0, 0))	6	0	(1, 2, 1, 2, 1, 2, 0, 0, 0)
4	(0, (1, 2, 1, 2, 1, 1, 2, 2, 0))	8	0	(1, 2, 1, 2, 1, 1, 2, 2, 0)

TicTacToe with a neural network¶

Generating optimal play data¶