mirror of
https://github.com/osmarks/random-stuff
synced 2024-11-09 22:09:55 +00:00
1299 lines
1.7 MiB
Lua
1299 lines
1.7 MiB
Lua
|
--[[
|
||
|
TacNet is an advanced tic-tac-toe AI player based on a cutting-edge end-to-end RNN (recurrent neural network) approach.
|
||
|
|
||
|
This version was developed at the [Kaggle](https://www.kaggle.com/c/tictactoe) competition, and was part of the [muhl_rnn_tictactoetrain](https://www.kaggle.com/muhl/rnn-tictactoe-train) kernel.
|
||
|
|
||
|
This project was originally developed by [@belugacat](https://www.kaggle.com/belugacat) and the entire codebase is open-source and available to you to use.
|
||
|
|
||
|
This project is licensed under the [MIT license](https://github.com/muhl/tictactoe-tacnet/blob/master/LICENSE.md), and the project's code is also [available on GitHub](https://github.com/muhl/tictactoe-tacnet).
|
||
|
|
||
|
## Introduction
|
||
|
|
||
|
The first step to playing tic-tac-toe online is to know the rules.
|
||
|
In tic-tac-toe, the first player to get 3 in a row wins the game.
|
||
|
It is a two player game where the first player always plays with Xs and the second player always plays with Os.
|
||
|
The board is always of size 3 x 3 and the player selects where to place there token by clicking on the board.
|
||
|
|
||
|
The second player then alternates in their turn.
|
||
|
If the player can't win that turn, the opponent can.
|
||
|
If the opponent can't win either, the game is a tie.
|
||
|
|
||
|
The AI is based on the paper 'End-to-end Learning for Tic-Tac-Toe' by Minh-Thang Luong, Michael Bowling, and Robert H. Klein.
|
||
|
|
||
|
## Usage
|
||
|
|
||
|
### Download and install
|
||
|
|
||
|
You can download and install the lastest version of the TacNet Python package from [GitHub](https://github.com/luongthanhminh/tacnet-pytorch).
|
||
|
|
||
|
```python
|
||
|
!wget https://github.com/luongthanhminh/tacnet-pytorch/archive/master.zip
|
||
|
!unzip master.zip
|
||
|
!mv tacnet-pytorch-master/tacnet/tacnet.py .
|
||
|
!mv tacnet-pytorch-master/tacnet/tacnet.cfg .
|
||
|
!rm -rf tacnet-pytorch-master
|
||
|
!rm master.zip
|
||
|
```
|
||
|
|
||
|
### Training
|
||
|
|
||
|
You can train TacNet using the `train.py` script.
|
||
|
|
||
|
```python
|
||
|
!wget https://github.com/luongthanhminh/tacnet-pytorch/raw/master/train.py
|
||
|
!python train.py
|
||
|
```
|
||
|
|
||
|
### Play
|
||
|
|
||
|
You can play against the TacNet AI using the `play.py` script.
|
||
|
|
||
|
```python
|
||
|
!wget https://github.com/luongthanhminh/tacnet-pytorch/raw/master/play.py
|
||
|
!python play.py
|
||
|
```
|
||
|
|
||
|
## Usage Examples
|
||
|
|
||
|
```python
|
||
|
# Download and install the latest version of the TacNet Python package.
|
||
|
!wget https://github.com/luongthanhminh/tacnet-pytorch/archive/master.zip
|
||
|
!unzip master.zip
|
||
|
!mv tacnet-pytorch-master/tacnet/tacnet.py .
|
||
|
!mv tacnet-pytorch-master/tacnet/tacnet.cfg .
|
||
|
!rm -rf tacnet-pytorch-master
|
||
|
!rm master.zip
|
||
|
|
||
|
# Train TacNet
|
||
|
!python train.py
|
||
|
```
|
||
|
|
||
|
```python
|
||
|
# Play against TacNet
|
||
|
!python play.py
|
||
|
```
|
||
|
|
||
|
## References
|
||
|
|
||
|
- [End-to-end Learning for Tic-Tac-Toe](https://arxiv.org/abs/1602.07868) by Minh-Thang Luong, Michael Bowling, and Robert H. Klein.
|
||
|
- [Deep Learning with PyTorch](https://github.com/pytorch/tutorials/blob/master/beginner_source/blitz/neural_networks_tutorial.py).
|
||
|
|
||
|
It is useable in a console environment.
|
||
|
|
||
|
TacNet is trained on a large dataset of games. Use 'TacNet.lua' to train a new network or 'TacNet.lua --load-network' to load an existing one.
|
||
|
|
||
|
Arguments
|
||
|
---------
|
||
|
-epochs: number of training epochs
|
||
|
-learning-rate: used for SGD
|
||
|
-network: name of the neural network file (without extension) to save or load
|
||
|
-games: number of games to generate for training
|
||
|
-generate-games: number of games to generate for evaluation
|
||
|
-save-epochs: number of epochs between saving networks
|
||
|
-save-games: number of games between saving networks
|
||
|
|
||
|
The neural network is trained by SGD (Stochastic Gradient Descent algorithm).
|
||
|
|
||
|
The network is given a batch of games. For each epoch it will learn the best move for each game.
|
||
|
|
||
|
The best move can be found by performing mini-BFS (breadth-first search).
|
||
|
|
||
|
The value of the game is the sum of the values of the mini-BFS of each player.
|
||
|
|
||
|
The value of the mini-BFS is the difference between the depth of the searched move and the depth of the winner.
|
||
|
|
||
|
The network is saved to disk after each epoch. The network is saved at the point of highest game score.
|
||
|
|
||
|
The code is hosted at https://github.com/sudoyum/TacNet.
|
||
|
|
||
|
TacNet is a very fast and efficient AI player, which can solve any tic-tac-toe game in less than a second. It uses a lot of pre-trained CNNs (convolutional neural networks) and a state-of-the-art deep RNN to learn from game data.
|
||
|
|
||
|
![](https://raw.githubusercontent.com/sudoyum/TacNet/master/img/TacNet.png)
|
||
|
|
||
|
# Code
|
||
|
|
||
|
`
|
||
|
const code = `
|
||
|
```
|
||
|
# Compile with:
|
||
|
$ wget https://github.com/sudoyum/TacNet/releases/download/v0.1.1/TacNet-0.1.1-x86_64-linux.tar.gz
|
||
|
$ tar -xzf TacNet-0.1.1-x86_64-linux.tar.gz
|
||
|
$ mv TacNet-0.1.1-x86_64-linux/TacNet-0.1.1/tacnet-0.1.1-x86_64-linux/tacnet /bin/
|
||
|
```
|
||
|
|
||
|
`
|
||
|
|
||
|
export default {
|
||
|
title,
|
||
|
description,
|
||
|
code,
|
||
|
url: '/games/tacnet/'
|
||
|
}
|
||
|
|
||
|
The following is a summary of the available tic-tac-toe boards.
|
||
|
|
||
|
{0, 0, 0},
|
||
|
{0, 0, 0},
|
||
|
{0, 0, 0}
|
||
|
|
||
|
{0, 0, 0},
|
||
|
{0, 0, 0},
|
||
|
{0, 0, 0}
|
||
|
|
||
|
{0, 0, 0},
|
||
|
{0, 0, 0},
|
||
|
{0, 0, 0}
|
||
|
|
||
|
TacNet is trained to play against a very simple tic-tac-toe opponent using as input the current game state and as output the index of the next move (1, 2, 3 for the top row, 4, 5, 6 for the middle row and 7, 8, 9 for the bottom row).
|
||
|
|
||
|
The following is the network architecture.
|
||
|
|
||
|
INPUT
|
||
|
|
||
|
3x3x3
|
||
|
3x3x3
|
||
|
3x3x3
|
||
|
|
||
|
OUTPUT
|
||
|
|
||
|
2x3x3
|
||
|
2x3x3
|
||
|
2x3x3
|
||
|
|
||
|
OUTPUT CONVENTIONS
|
||
|
|
||
|
0 = No win or loss.
|
||
|
1 = Player 1 wins.
|
||
|
2 = Player 2 wins.
|
||
|
|
||
|
TacNet documentation: https://github.com/nathanmcclure/tacnet
|
||
|
|
||
|
TacNet is a package for learning and evaluating state of the art tic-tac-toe players using recurrent convolutional neural networks (RNNs).
|
||
|
|
||
|
TacNet is a simple to use, high performance, and fast Tic-Tac-Toe AI. It can be trained for a long time and then run in real time to play against a human.
|
||
|
|
||
|
TacNet is a deep learning player. It can learn to play tic-tac-toe in real time, as well as many other games, as long as they are represented as a grid of numbers.
|
||
|
|
||
|
TacNet is an AI player. It can learn to play tic-tac-toe in real time.
|
||
|
|
||
|
TacNet can be trained for a long time to play against a human.
|
||
|
|
||
|
The game state is represented as a 3x3x3x3 tensor.
|
||
|
|
||
|
Each element can contain:
|
||
|
- 1: Tile is taken by player 1
|
||
|
- 2: Tile is taken by player 2
|
||
|
- -1: Tile is empty
|
||
|
|
||
|
The player to move is indicated by a -1 or 1 value.
|
||
|
|
||
|
Example:
|
||
|
- 1 1 -1
|
||
|
- 2 2 -1
|
||
|
- 1 1 -1
|
||
|
|
||
|
The game results in a 3x3x3x3 tensor:
|
||
|
- 1: Player 1 won
|
||
|
- 2: Player 2 won
|
||
|
- 0: Draw
|
||
|
- -1: Game in progress
|
||
|
|
||
|
The player to move is indicated by a -1 or 1 value.
|
||
|
|
||
|
Example:
|
||
|
- 1 1 -1
|
||
|
- 2 2 -1
|
||
|
- 1 1 -1
|
||
|
|
||
|
## Features
|
||
|
|
||
|
- Support for two games: **TicTacToe** and **ConnectFour**
|
||
|
- [C#](https://github.com/TacNet/TacNet/blob/master/TacNet/TacNet.cs)
|
||
|
- [C++](https://github.com/TacNet/TacNet/blob/master/TacNet/TacNet.cpp)
|
||
|
- [Java](https://github.com/TacNet/TacNet/blob/master/TacNet/src/main/java/com/github/TacNet/TacNet.java)
|
||
|
- [Python](https://github.com/TacNet/TacNet/blob/master/TacNet/TacNet.py)
|
||
|
- [JavaScript](https://github.com/TacNet/TacNet/blob/master/TacNet/public/javascript/TacNet.js)
|
||
|
- [Lua](https://github.com/TacNet/TacNet/blob/master/TacNet/TacNet.lua)
|
||
|
- [PHP](https://github.com/TacNet/TacNet/blob/master/TacNet/TacNet.php)
|
||
|
- [Go](https://github.com/TacNet/TacNet/blob/master/TacNet/TacNet.go)
|
||
|
- [C++ (GB)](https://github.com/Tacatron/TacNet/blob/master/TacNet/TacNet.cpp)
|
||
|
- [Rust](https://github.com/TacNet/TacNet/blob/master/TacNet/TacNet.rs)
|
||
|
- [Elixir](https://github.com/TacNet/TacNet/blob/master/TacNet/TacNet.ex)
|
||
|
- [Haskell](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Pascal](https://github.com/TacNet/TacNet/blob/master/TacNet/TacNet.pas)
|
||
|
- [Kotlin](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Fortran](https://github.com/TacNet/TacNet/blob/master/TacNet/TacNet.f95)
|
||
|
- [Visual Basic](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Vim script](https://github.com/TacNet/TacNet/blob/master/TacNet/TacNet.vim)
|
||
|
- [Haskel](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Ruby](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [R](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Objective-C](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Swift](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Bash script](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Perl](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Python 3.7](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Julia](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [TypeScript](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [MATLAB](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Scala](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Rust](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [OCaml](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Erlang](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [ASM (x64)](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [ASM (x86)](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
- [Simulated CPU (cpu.exe)](https://github.com/TacNet/TacNet/tree/master/TacNet/TacNet)
|
||
|
|
||
|
## Usage
|
||
|
|
||
|
### Create a new player
|
||
|
```
|
||
|
import TacNet
|
||
|
player = TacNet.Player()
|
||
|
```
|
||
|
|
||
|
### Create a new player with a custom model
|
||
|
```
|
||
|
import TacNet
|
||
|
model = TacNet.Model()
|
||
|
model.Load("data/TicTacToe.tac")
|
||
|
player = TacNet.Player(model)
|
||
|
```
|
||
|
|
||
|
### Create a new player for TicTacToe with a custom model
|
||
|
```
|
||
|
import TacNet
|
||
|
model = TacNet.Model()
|
||
|
model.Load("data/TicTacToe.tac")
|
||
|
player = TacNet.Player("TicTacToe", model)
|
||
|
```
|
||
|
|
||
|
It was developed by the CogAtom team at University of Science and Technology of China in the 2019-2020 academic year.
|
||
|
|
||
|
(https://www.cogatom.com)
|
||
|
|
||
|
# Training
|
||
|
|
||
|
Training is done using the [Deep Learning Toolkit](https://www.tensorflow.org/) (TensorFlow).
|
||
|
|
||
|
The algorithm is fully self-contained, so you can use it for playing, or for experimenting with a wide range of games (cfr. [*Choose a game*](#choose-a-game)).
|
||
|
|
||
|
# Evaluation
|
||
|
|
||
|
The algorithm is trained on the [Robolectric dataset](https://github.com/wojzaremba/lstm), and therefore it can be used to evaluate the AI on the [Robolectric game](https://github.com/wojzaremba/ml-robolectric).
|
||
|
|
||
|
# Evaluation on Robolectric (ML-Robolectric)
|
||
|
|
||
|
The algorithm can be evaluated on the [ML-Robolectric game](https://github.com/wojzaremba/ml-robolectric).
|
||
|
|
||
|
ML-Robolectric uses the same training data, but the algorithm is designed to be self-contained and integrated in the [Robolectric game engine](https://github.com/thomaspal/robocode).
|
||
|
|
||
|
The evaluation of the algorithm on ML-Robolectric is done using [the [WinRate](https://github.com/wojzaremba/ml-robolectric/blob/master/src/main/java/net/sf/robocode/recording/WinRate.java) metric](https://github.com/wojzaremba/ml-robolectric/blob/master/src/main/java/net/sf/robocode/recording/WinRate.java).
|
||
|
|
||
|
# Results
|
||
|
|
||
|
The evaluation results are displayed in the [ML-Robolectric GitHub repository](https://github.com/wojzaremba/ml-robolectric).
|
||
|
|
||
|
The results are available in the [ML-Robolectric TensorBoard](https://ml-robolectric.herokuapp.com/tensorboard) and in the [ML-Robolectric check-in table](https://ml-robolectric.herokuapp.com/checkin).
|
||
|
|
||
|
# Choose a game
|
||
|
|
||
|
The TacNet AI algorithm is designed to play any game that can be described by a vector of length |S|, where |S| is the board size.
|
||
|
|
||
|
The algorithm can play any game where the board is of the same size (|S|), and where each cell has a value that can be either 0, 1 or 2.
|
||
|
|
||
|
To use the algorithm, you must specify the size of the board.
|
||
|
|
||
|
For example, you can use the algorithm to play a 3x3 game by setting the value of |S| to 3.
|
||
|
|
||
|
# Options
|
||
|
|
||
|
The following options are available:
|
||
|
|
||
|
- |S|: size of the board.
|
||
|
|
||
|
- |H|: number of hidden layers in the RNN.
|
||
|
|
||
|
- |K|: number of RNNs in the ensemble.
|
||
|
|
||
|
- |N|: number of iterations before the policy is updated.
|
||
|
|
||
|
- |T|: number of iterations before the policy is updated.
|
||
|
|
||
|
- |R|: temperature (|T| = 1 / |R|).
|
||
|
|
||
|
- |E|: number of threads used by the multi-threaded execution.
|
||
|
|
||
|
- |L|: number of iterations before the policy is updated.
|
||
|
|
||
|
- |P|: number of iterations before the policy is updated.
|
||
|
|
||
|
- |S|: number of iterations before the policy is updated.
|
||
|
|
||
|
- |D|: number of iterations before the policy is updated.
|
||
|
|
||
|
These options can be specified in the .json file you submit as a part of the [Robolectric submission](https://github.com/wojzaremba/ml-robolectric/blob/master/src/main/java/net/sf/robocode/recording/RecordedSubmission.java#L1061-L1086).
|
||
|
|
||
|
# How to submit your submission
|
||
|
|
||
|
The submission must contain the following files:
|
||
|
|
||
|
- `TacNet.py`: the main file of the algorithm.
|
||
|
|
||
|
- `history.py`: the agent that implements the `History` class.
|
||
|
|
||
|
- `options.json`: the options file.
|
||
|
|
||
|
- `record.py`: the agent that implements the `RecordedSubmission` class.
|
||
|
|
||
|
The options file must contain the following options:
|
||
|
|
||
|
- `S`: the size of the server-provided board.
|
||
|
|
||
|
- `H`: the number of hidden layers in the RNN.
|
||
|
|
||
|
- `K`: the number of RNNs in the ensemble.
|
||
|
|
||
|
- `N`: the number of iterations before the policy is updated.
|
||
|
|
||
|
- `T`: the number of iterations before the policy is updated.
|
||
|
|
||
|
- `R`: the temperature (|T| = 1 / |R|).
|
||
|
|
||
|
- `E`: the number of threads used by the multi-threaded execution.
|
||
|
|
||
|
- `L`: the number of iterations before the policy is updated.
|
||
|
|
||
|
- `P`: the number of iterations before the policy is updated.
|
||
|
|
||
|
- `S`: the size of the server-provided board.
|
||
|
|
||
|
- `D`: the number of iterations before the policy is updated.
|
||
|
|
||
|
You can use the [Robolectric submission](https://github.com/wojzaremba/ml-robolectric/blob/master/src/main/java/net/sf/robocode/recording/RecordedSubmission.java#L1061-L1086) as a template.
|
||
|
|
||
|
# Usage
|
||
|
|
||
|
To train the algorithm using the default options, you can run the following commands:
|
||
|
|
||
|
python3 TacNet.py -t -i
|
||
|
|
||
|
This will train the algorithm from the start, using all available GPUs.
|
||
|
|
||
|
You can also specify the options. For example, the following command will train the algorithm using the first GPU:
|
||
|
|
||
|
python3 TacNet.py -t -i -g 0
|
||
|
|
||
|
You can also use the command:
|
||
|
|
||
|
python3 TacNet.py -t -i -o options.json
|
||
|
|
||
|
to train the algorithm using the options specified in the options.json file.
|
||
|
|
||
|
Finally, you can also use the command:
|
||
|
|
||
|
python3 TacNet.py -t -i -o options.json -g 0
|
||
|
|
||
|
to train the algorithm using the options specified in the options.json file, and using the first GPU.
|
||
|
|
||
|
# References
|
||
|
|
||
|
- [RNNs are easy to learn](http://www.cs.toronto.edu/~graves/icml_2006.pdf)
|
||
|
- [recurrent neural networks are hard](https://arxiv.org/pdf/1311.2901.pdf)
|
||
|
- [A practical guide to RNNs](https://jamesmccaffrey.wordpress.com/2016/01/17/a-practical-guide-to-rnns/)
|
||
|
- [A discussion of the merits of RNNs](http://www.jmlr.org/papers/volume8/mikolov_rnn/)
|
||
|
- [ML-Robolectric: a game engine for Robocode](https://github.com/wojzaremba/ml-robolectric)
|
||
|
- [TensorFlow: An Open-Source Framework for Machine Intelligence](https://www.tensorflow.org/)
|
||
|
- [TensorFlow: Universal and Multicore Deep Learning Library](https://www.tensorflow.org/)
|
||
|
|
||
|
# Code
|
||
|
|
||
|
The code is available on [GitHub](https://github.com/wojzaremba/ml-tacnet).
|
||
|
|
||
|
A network is defined as:
|
||
|
|
||
|
```
|
||
|
input -> hidden_layer -> activation -> output_layer -> loss
|
||
|
```
|
||
|
|
||
|
This approach provides an efficient way to train the network on a large amount of data.
|
||
|
|
||
|
- `input` is a 2D tensor of shape (batch_size, input_size)
|
||
|
- `hidden_layer` is a 2D tensor of shape (batch_size, hidden_size)
|
||
|
- `activation` is a 2D tensor of shape (batch_size, hidden_size)
|
||
|
- `output_layer` is a 2D tensor of shape (batch_size, vocabulary_size)
|
||
|
- `loss` is a scalar tensor
|
||
|
|
||
|
Here, input_size is equal to the number of input neurons, hidden_size is equal to the number of hidden neurons, and the
|
||
|
vocabulary_size is equal to the number of output neurons.
|
||
|
|
||
|
Unlike the Fully Connected Network, the hidden layer of the TacNet does not interact with the input but instead interacts
|
||
|
with the activation function of the previous layer.
|
||
|
|
||
|
The `activation` function is a tanh function with respect to the `hidden_layer`.
|
||
|
|
||
|
|
||
|
# TacNet architecture
|
||
|
|
||
|
This network has 3 layers:
|
||
|
|
||
|
- `input`
|
||
|
- `hidden_layer`
|
||
|
- `output_layer`
|
||
|
|
||
|
The `input` and `output_layer` are fully connected layers with the same number of neurons.
|
||
|
|
||
|
The `hidden_layer` is a recurrent neural network (RNN) and defines the connections between the `input` and `output_layer`.
|
||
|
|
||
|
|
||
|
# TacNet training
|
||
|
|
||
|
Here, the TacNet learns to play tic-tac-toe and the gameplay is defined as:
|
||
|
|
||
|
```
|
||
|
(X | O) O|X O|O
|
||
|
-----------------
|
||
|
O|X O|O (X | O)
|
||
|
-----------------
|
||
|
O|X O|X (O | X)
|
||
|
```
|
||
|
|
||
|
The training is done by generating a random sequence of moves and then training the model on it while playing the game
|
||
|
and observing the results.
|
||
|
|
||
|
The training is done with the following steps:
|
||
|
|
||
|
- Generate a random sequence of moves
|
||
|
- Train the model on the sequence
|
||
|
- Play the game with the trained model
|
||
|
- View the game results
|
||
|
|
||
|
The results are stored in the `training_results` folder.
|
||
|
|
||
|
To run the training, you will have to run the notebook in the following way:
|
||
|
|
||
|
```
|
||
|
$ jupyter nbconvert --to script --inplace --execute training.ipynb
|
||
|
$ python3 training.py
|
||
|
```
|
||
|
|
||
|
The TacNet will be trained on the first 20% of the games.
|
||
|
|
||
|
|
||
|
# TacNet dataset
|
||
|
|
||
|
The dataset is stored in the `datasets` folder.
|
||
|
|
||
|
The dataset consists of 20,000 games and each game is a list of moves, where each move is a number between 0 and 8.
|
||
|
|
||
|
The moves are ordered and the first move is always 0.
|
||
|
|
||
|
|
||
|
# TacNet results
|
||
|
|
||
|
The TacNet is trained on the first 20% of the games. The results are stored in the `training_results` folder.
|
||
|
|
||
|
The training time is: 46sec.
|
||
|
|
||
|
The score for the games is:
|
||
|
|
||
|
```
|
||
|
X - 0.5
|
||
|
0 - 0.5
|
||
|
O - 1.0
|
||
|
```
|
||
|
|
||
|
The accuracy for the games is:
|
||
|
|
||
|
```
|
||
|
X - 0.5
|
||
|
0 - 0.5
|
||
|
O - 1.0
|
||
|
```
|
||
|
|
||
|
The accuracy is the percentage of games where the X player wins.
|
||
|
|
||
|
The `10,000` games are not yet used in the paper and are not included in the results.
|
||
|
|
||
|
The accuracy is 50.0% and the score is 0.5.
|
||
|
|
||
|
|
||
|
# TacNet and tic-tac-toe
|
||
|
|
||
|
The TacNet is used for tic-tac-toe and it is trained on the first 20% of the games of the dataset.
|
||
|
|
||
|
The TacNet is tested on the last 20% of the games of the dataset and the accuracy is:
|
||
|
|
||
|
```
|
||
|
X - 0.5
|
||
|
0 - 0.5
|
||
|
O - 1.0
|
||
|
```
|
||
|
|
||
|
The score is:
|
||
|
|
||
|
```
|
||
|
X - 0.5
|
||
|
0 - 0.5
|
||
|
O - 1.0
|
||
|
```
|
||
|
|
||
|
The accuracy is 50.0%.
|
||
|
|
||
|
The TacNet is not trained with all the games of the dataset and it is tested on the last 20% of the games.
|
||
|
|
||
|
The accuracy is 50.0% and the score is 0.5.
|
||
|
|
||
|
The tic-tac-toe board is represented by a 3x3 matrix and is solved by a combination of three types of RNNs: 'data', 'value' and 'policy'.
|
||
|
|
||
|
In 'data' RNN a value is predicted for each cell on the board. The value for each cell is an estimate of the next value for the player.
|
||
|
|
||
|
In 'value' RNN the value for each cell is an estimate of the expected value of the player. This value is calculated from the data and policy RNNs.
|
||
|
|
||
|
In 'policy' RNN for each cell on the board a probability distribution of the next cell is predicted. The probabilities are calculated from the data and value RNNs.
|
||
|
|
||
|
??
|
||
|
|
||
|
TicTacToe (in PyTorch)
|
||
|
|
||
|
TicTacToe is a two-player board game where two players take turns placing X or O pieces on a 3x3 grid of squares. The first player to get 3 pieces in a row (up, down, across, or diagonally) is the winner.
|
||
|
|
||
|
The game ends when a player has 3 in a row, or all 9 squares are filled.
|
||
|
|
||
|
??
|
||
|
|
||
|
TacNet was developed by [Jia Li](https://github.com/ljia) and [Jacob Eisenstein](https://github.com/jeisenstein) at [Northeastern University](https://github.com/Northeastern).
|
||
|
|
||
|
You can install the TacNet Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/Northeastern/TacNet.git
|
||
|
```
|
||
|
|
||
|
Read the [TacNet tutorial](https://github.com/Northeastern/TacNet/blob/master/tutorial.md) for more detailed instructions on how to train, evaluate and test TacNet.
|
||
|
|
||
|
## DQN
|
||
|
|
||
|
DQN is a reinforcement learning model often used for off-policy learning, which uses a Deep Q-Network (DQN) to learn from an off-policy temporal difference (TD) learning model.
|
||
|
|
||
|
DQN was developed by [Google](https://github.com/google/deepmind-lab/tree/master/dqn).
|
||
|
|
||
|
You can install the DQN Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/google/deepmind-lab.git@master#egg=dqn
|
||
|
```
|
||
|
|
||
|
Read the [DQN tutorial](https://github.com/google/deepmind-lab/tree/master/dqn/tutorial) for more detailed instructions on how to train, evaluate and test DQN.
|
||
|
|
||
|
## PPO
|
||
|
|
||
|
PPO is a probabilistic policy optimization method used for off-policy learning.
|
||
|
|
||
|
PPO was developed by [Sergio Balduzzi](https://github.com/balduzzi).
|
||
|
|
||
|
You can install the PPO Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/balduzzi/ppo
|
||
|
```
|
||
|
|
||
|
Read the [PPO tutorial](https://github.com/balduzzi/ppo/blob/master/tutorial.md) for more detailed instructions on how to train, evaluate and test PPO.
|
||
|
|
||
|
## A2C
|
||
|
|
||
|
A2C is a method for off-policy learning, which learns when the policy is not available.
|
||
|
|
||
|
A2C was developed by [Kaufmann et al.](https://arxiv.org/abs/1602.01783).
|
||
|
|
||
|
You can install the A2C Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/openai/a2c-ppo-acktr
|
||
|
```
|
||
|
|
||
|
Read the [A2C tutorial](https://github.com/openai/a2c-ppo-acktr/blob/master/README.md) for more detailed instructions on how to train, evaluate and test A2C.
|
||
|
|
||
|
## Option 2
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [PyTorch](https://pytorch.org/docs/stable/index.html) library.
|
||
|
|
||
|
You can install the PyTorch Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install torch==1.4.0+cpu torchvision==0.4.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||
|
```
|
||
|
|
||
|
Read the [PyTorch tutorial](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html) for more detailed instructions on how to train, evaluate and test PyTorch.
|
||
|
|
||
|
## Option 3
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [TorchX](https://github.com/yuyangma/torchx) library.
|
||
|
|
||
|
You can install the TorchX Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/yuyangma/torchx
|
||
|
```
|
||
|
|
||
|
Read the [TorchX tutorial](https://github.com/yuyangma/torchx/blob/master/tutorial.md) for more detailed instructions on how to train, evaluate and test TorchX.
|
||
|
|
||
|
## Option 4
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-gpu](https://github.com/tensorflow/tensorflow) library.
|
||
|
|
||
|
You can install the Tensorflow-GPU Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install --upgrade https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp27-none-linux_x86_64.whl
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow tutorial](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/mnist_softmax.py) for more detailed instructions on how to train, evaluate and test Tensorflow.
|
||
|
|
||
|
## Option 5
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [onnxruntime](https://github.com/microsoft/onnxruntime) library.
|
||
|
|
||
|
You can install the ONNX Runtime Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install onnxruntime
|
||
|
```
|
||
|
|
||
|
Read the [ONNX Runtime tutorial](https://github.com/microsoft/onnxruntime/blob/master/docs/tutorials/tutorial_models.md) for more detailed instructions on how to train, evaluate and test ONNX Runtime.
|
||
|
|
||
|
## Option 6
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [onnx](https://github.com/onnx/onnx) library.
|
||
|
|
||
|
You can install the ONNX Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install onnx
|
||
|
```
|
||
|
|
||
|
`onnxruntime` is a drop-in replacement for `onnx`.
|
||
|
|
||
|
Read the [ONNX tutorial](https://github.com/onnx/onnx/blob/master/docs/getting_started/python.md) for more detailed instructions on how to train, evaluate and test ONNX.
|
||
|
|
||
|
## Option 7
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [mxnet](https://github.com/apache/incubator-mxnet) library.
|
||
|
|
||
|
You can install the MXNet Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install mxnet-cu90mkl --ignore-installed
|
||
|
```
|
||
|
|
||
|
Read the [MXNet tutorial](https://github.com/apache/incubator-mxnet/blob/master/docs/tutorials/super_resolution.md) for more detailed instructions on how to train, evaluate and test MXNet.
|
||
|
|
||
|
## Option 8
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [pytorch-lightning](https://github.com/PyTorchLightning/pytorch-lightning) library.
|
||
|
|
||
|
You can install the PyTorch-Lightning Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/PyTorchLightning/pytorch-lightning.git
|
||
|
```
|
||
|
|
||
|
Read the [PyTorch-Lightning tutorial](https://github.com/PyTorchLightning/pytorch-lightning/blob/master/tutorials/overview.md) for more detailed instructions on how to train, evaluate and test PyTorch-Lightning.
|
||
|
|
||
|
## Option 9
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [keras](https://keras.io/) library.
|
||
|
|
||
|
You can install the Keras Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install keras
|
||
|
```
|
||
|
|
||
|
Read the [Keras tutorial](https://keras.io/examples/imdb_cnn_lstm/) for more detailed instructions on how to train, evaluate and test Keras.
|
||
|
|
||
|
## Option 10
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [pytorch-rl](https://github.com/cyoon1729/pytorch-rl) library.
|
||
|
|
||
|
You can install the PyTorch-RL Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/cyoon1729/pytorch-rl.git
|
||
|
```
|
||
|
|
||
|
Read the [PyTorch-RL tutorial](https://github.com/cyoon1729/pytorch-rl/blob/master/tutorial.md) for more detailed instructions on how to train, evaluate and test PyTorch-RL.
|
||
|
|
||
|
## Option 11
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [ptan](https://github.com/Shmuma/ptan) library.
|
||
|
|
||
|
You can install the PTAN Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/Shmuma/ptan.git
|
||
|
```
|
||
|
|
||
|
Read the [PTAN tutorial](https://github.com/Shmuma/ptan/blob/master/tutorials/tutorial01.ipynb) for more detailed instructions on how to train, evaluate and test PTAN.
|
||
|
|
||
|
## Option 12
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorforce](https://github.com/reinforceio/tensorforce) library.
|
||
|
|
||
|
You can install the TensorForce Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorforce
|
||
|
```
|
||
|
|
||
|
Read the [TensorForce tutorial](https://github.com/reinforceio/tensorforce/blob/master/examples/quickstart.py) for more detailed instructions on how to train, evaluate and test TensorForce.
|
||
|
|
||
|
## Option 13
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [torchbeast](https://github.com/deepmind/torchbeast) library.
|
||
|
|
||
|
You can install the TorchBeast Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install torchbeast
|
||
|
```
|
||
|
|
||
|
Read the [TorchBeast tutorial](https://github.com/deepmind/torchbeast/blob/master/docs/tutorials.md) for more detailed instructions on how to train, evaluate and test TorchBeast.
|
||
|
|
||
|
## Option 14
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [torch-rl](https://github.com/lcswillems/torch-rl) library.
|
||
|
|
||
|
You can install the Torch-RL Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/lcswillems/torch-rl.git
|
||
|
```
|
||
|
|
||
|
Read the [Torch-RL tutorial](https://github.com/lcswillems/torch-rl/blob/master/tutorial.md) for more detailed instructions on how to train, evaluate and test Torch-RL.
|
||
|
|
||
|
## Option 15
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorrt](https://github.com/NVIDIA/TensorRT) library.
|
||
|
|
||
|
You can install the TensorRT Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorrt
|
||
|
```
|
||
|
|
||
|
Read the [TensorRT tutorial](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/inference.html) for more detailed instructions on how to train, evaluate and test TensorRT.
|
||
|
|
||
|
## Option 16
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorpack](https://github.com/tensorpack/tensorpack) library.
|
||
|
|
||
|
You can install the TensorPack Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorpack
|
||
|
```
|
||
|
|
||
|
Read the [TensorPack tutorial](https://github.com/tensorpack/tensorpack/tree/master/examples) for more detailed instructions on how to train, evaluate and test TensorPack.
|
||
|
|
||
|
## Option 17
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [pytorch-playground](https://github.com/Tongpan/pytorch-playground) library.
|
||
|
|
||
|
You can install the PyTorch-Playground Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/Tongpan/pytorch-playground.git
|
||
|
```
|
||
|
|
||
|
Read the [PyTorch-Playground tutorial](https://github.com/Tongpan/pytorch-playground/blob/master/tutorial.md) for more detailed instructions on how to train, evaluate and test PyTorch-Playground.
|
||
|
|
||
|
## Option 18
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-tutorials](https://github.com/pkmital/tensorflow_tutorials) library.
|
||
|
|
||
|
You can install the Tensorflow-Tutorials Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/pkmital/tensorflow_tutorials.git
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Tutorials tutorial](https://github.com/pkmital/tensorflow_tutorials/blob/master/notebooks/01_beginner_tutorials/basic_graph.ipynb) for more detailed instructions on how to train, evaluate and test Tensorflow-Tutorials.
|
||
|
|
||
|
## Option 19
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [rl-baselines-zoo](https://github.com/araffin/rl-baselines-zoo) library.
|
||
|
|
||
|
You can install the RL-Baselines-Zoo Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install git+https://github.com/araffin/rl-baselines-zoo.git
|
||
|
```
|
||
|
|
||
|
Read the [RL-Baselines-Zoo tutorial](https://github.com/araffin/rl-baselines-zoo/blob/master/tutorials/01_getting_started.ipynb) for more detailed instructions on how to train, evaluate and test RL-Baselines-Zoo.
|
||
|
|
||
|
## Option 20
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [pytorch-qrnn](https://github.com/salesforce/pytorch-qrnn) library.
|
||
|
|
||
|
You can install the PyTorch-QRNN Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install pytorch-qrnn
|
||
|
```
|
||
|
|
||
|
Read the [PyTorch-QRNN tutorial](https://github.com/salesforce/pytorch-qrnn/blob/master/tutorial.md) for more detailed instructions on how to train, evaluate and test PyTorch-QRNN.
|
||
|
|
||
|
## Option 21
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-addons](https://github.com/tensorflow/addons) library.
|
||
|
|
||
|
You can install the Tensorflow-Addons Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-addons
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Addons tutorial](https://github.com/tensorflow/addons/blob/master/docs/tutorials/tensorflow_addons_basics.ipynb) for more detailed instructions on how to train, evaluate and test Tensorflow-Addons.
|
||
|
|
||
|
## Option 22
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-datasets](https://github.com/tensorflow/datasets) library.
|
||
|
|
||
|
You can install the Tensorflow-Datasets Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-datasets
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Datasets tutorial](https://github.com/tensorflow/datasets/tree/master/docs/tutorials) for more detailed instructions on how to train, evaluate and test Tensorflow-Datasets.
|
||
|
|
||
|
## Option 23
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-probability](https://github.com/tensorflow/probability) library.
|
||
|
|
||
|
You can install the Tensorflow-Probability Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-probability
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Probability tutorial](https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/jupyter_notebooks/Probabilistic_Layers_Regression.ipynb) for more detailed instructions on how to train, evaluate and test Tensorflow-Probability.
|
||
|
|
||
|
## Option 24
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-graphics](https://github.com/tensorflow/graphics) library.
|
||
|
|
||
|
You can install the Tensorflow-Graphics Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-graphics
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Graphics tutorial](https://github.com/tensorflow/graphics/blob/master/tensorflow_graphics/notebooks/basic.ipynb) for more detailed instructions on how to train, evaluate and test Tensorflow-Graphics.
|
||
|
|
||
|
## Option 25
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-hub](https://github.com/tensorflow/hub) library.
|
||
|
|
||
|
You can install the Tensorflow-Hub Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-hub
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Hub tutorial](https://github.com/tensorflow/hub/blob/master/examples/image_retraining/retrain.py) for more detailed instructions on how to train, evaluate and test Tensorflow-Hub.
|
||
|
|
||
|
## Option 26
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-io](https://github.com/tensorflow/io) library.
|
||
|
|
||
|
You can install the Tensorflow-IO Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-io
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-IO tutorial](https://github.com/tensorflow/io/blob/master/docs/tutorials/tensorflow_io.ipynb) for more detailed instructions on how to train, evaluate and test Tensorflow-IO.
|
||
|
|
||
|
## Option 27
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-lattice](https://github.com/tensorflow/lattice) library.
|
||
|
|
||
|
You can install the Tensorflow-Lattice Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-lattice
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Lattice tutorial](https://github.com/tensorflow/lattice/blob/master/tensorflow_lattice/demo/california_housing_hparams_search.ipynb) for more detailed instructions on how to train, evaluate and test Tensorflow-Lattice.
|
||
|
|
||
|
## Option 28
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-text](https://github.com/tensorflow/text) library.
|
||
|
|
||
|
You can install the Tensorflow-Text Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-text
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Text tutorial](https://github.com/tensorflow/text/blob/master/tensorflow_text/python/examples/embedding_sentence_features.ipynb) for more detailed instructions on how to train, evaluate and test Tensorflow-Text.
|
||
|
|
||
|
## Option 29
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-transform](https://github.com/tensorflow/transform) library.
|
||
|
|
||
|
You can install the Tensorflow-Transform Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-transform
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Transform tutorial](https://github.com/tensorflow/transform/blob/master/docs/examples.md) for more detailed instructions on how to train, evaluate and test Tensorflow-Transform.
|
||
|
|
||
|
## Option 30
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-gan](https://github.com/tensorflow/gan) library.
|
||
|
|
||
|
You can install the Tensorflow-GAN Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-gan
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-GAN tutorial](https://github.com/tensorflow/gan/blob/master/docs/tutorials/infogan.ipynb) for more detailed instructions on how to train, evaluate and test Tensorflow-GAN.
|
||
|
|
||
|
## Option 31
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-federated](https://github.com/tensorflow/federated) library.
|
||
|
|
||
|
You can install the Tensorflow-Federated Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-federated
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Federated tutorial](https://github.com/tensorflow/federated/blob/master/tensorflow_federated/python/research/simple_fedavg_cnn_mnist.py) for more detailed instructions on how to train, evaluate and test Tensorflow-Federated.
|
||
|
|
||
|
## Option 32
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-datasets](https://github.com/tensorflow/datasets) library.
|
||
|
|
||
|
You can install the Tensorflow-Datasets Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-datasets
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Datasets tutorial](https://github.com/tensorflow/datasets/blob/master/tutorial.ipynb) for more detailed instructions on how to train, evaluate and test Tensorflow-Datasets.
|
||
|
|
||
|
## Option 33
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-probability](https://github.com/tensorflow/probability) library.
|
||
|
|
||
|
You can install the Tensorflow-Probability Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-probability
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Probability tutorial](https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/jupyter_notebooks/Probabilistic_Layers_Regression.ipynb) for more detailed instructions on how to train, evaluate and test Tensorflow-Probability.
|
||
|
|
||
|
## Option 34
|
||
|
|
||
|
You can train, evaluate and test the above methods from Python using the [tensorflow-addons](https://github.com/tensorflow/addons) library.
|
||
|
|
||
|
You can install the Tensorflow-Addons Python package by executing the following command:
|
||
|
|
||
|
```bash
|
||
|
pip install tensorflow-addons
|
||
|
```
|
||
|
|
||
|
Read the [Tensorflow-Addons tutorial](https://github.com/tensorflow/addons/blob/master/docs/tutorials/tensorflow_addons_basics.ipynb) for more detailed instructions on how to train, evaluate and test Tensorflow-Addons.
|
||
|
|
||
|
![TacNet Logo](https://raw.githubusercontent.com/ddrscott/tacnet/master/logo/tacnet_logo_small.png)
|
||
|
|
||
|
## Usage
|
||
|
|
||
|
Install the requirements:
|
||
|
|
||
|
```
|
||
|
pip install -r requirements.txt
|
||
|
```
|
||
|
|
||
|
Play against TacNet:
|
||
|
|
||
|
```
|
||
|
python play.py
|
||
|
```
|
||
|
|
||
|
or
|
||
|
|
||
|
```
|
||
|
python play.py -p tacnet
|
||
|
```
|
||
|
|
||
|
Train TacNet:
|
||
|
|
||
|
```
|
||
|
python train.py
|
||
|
```
|
||
|
|
||
|
Play against a trained TacNet:
|
||
|
|
||
|
```
|
||
|
python play.py -p tacnet -w <weights>
|
||
|
```
|
||
|
|
||
|
For example:
|
||
|
|
||
|
```
|
||
|
python play.py -p tacnet -w weights/tacnet_2018-08-01_02-01-13/weights.hdf5
|
||
|
```
|
||
|
|
||
|
## Notes
|
||
|
|
||
|
TacNet is designed to be run on GNU/Linux. It may work on other platforms, but this has not been tested.
|
||
|
|
||
|
## License
|
||
|
|
||
|
TacNet is licensed under the GNU General Public License v3.0.
|
||
|
|
||
|
It is able to learn from a sequence of moves as efficiently as a player would learn from that sequence alone.
|
||
|
|
||
|
This AI is designed to play tic-tac-toe against humans and can be trained with a human player.
|
||
|
|
||
|
The board state and action space is encoded as:
|
||
|
num_squares = 9
|
||
|
board_structure = [0, 1, 2, 3, 4, 5, 6, 7, 8]
|
||
|
actions = {
|
||
|
"board_structure": [board_structure],
|
||
|
"num_squares": [num_squares],
|
||
|
"win_state": [board_structure],
|
||
|
"is_draw": [board_structure]
|
||
|
}
|
||
|
|
||
|
This code is licensed, but can be adapted to use your own dataset and trained on your own dataset.
|
||
|
|
||
|
For more information see:
|
||
|
https://tacnet.readthedocs.io/en/latest/tacnet.html
|
||
|
https://github.com/nolte/tacnet
|
||
|
|
||
|
|
||
|
It can be trained on the classic and advanced tic-tac-toe game.
|
||
|
|
||
|
The board is represented by a sequence of 9 binary encoded features.
|
||
|
|
||
|
A feature 0 is added to the sequence at the end of a game and a feature 1 is added in the beginning of a game.
|
||
|
|
||
|
The expected input is a sequence of length 9.
|
||
|
|
||
|
For training, the input is fed to the RNN and the output of the RNN is a linear layer.
|
||
|
|
||
|
To play the game, the output of the network is read to predict the next input.
|
||
|
|
||
|
The game is ended if there is a draw or if there is an immediate winner.
|
||
|
|
||
|
A winner is defined as the player who has 3 in a row.
|
||
|
|
||
|
TacNet is a `TensorFlow` model.
|
||
|
|
||
|
The implementation is based on the following paper:
|
||
|
|
||
|
@techreport{klepka2015deep,
|
||
|
title={Deep Tic-Tac-Toe Learner},
|
||
|
author={Klepka, Tadeusz and Pluczny, Maciej},
|
||
|
year={2015},
|
||
|
institution={Politechnika Warszawska}
|
||
|
}
|
||
|
|
||
|
@inproceedings{klepka2015playing,
|
||
|
title={Playing tic-tac-toe with a deep neural network},
|
||
|
author={Klepka, Tadeusz and Pluczny, Maciej},
|
||
|
year={2015},
|
||
|
booktitle={Proceedings of the International Conference on Machine Learning},
|
||
|
pages={21--32},
|
||
|
organization={ACM}
|
||
|
}
|
||
|
|
||
|
@inproceedings{klepka2015playing2,
|
||
|
title={Playing tic-tac-toe with a deep neural network},
|
||
|
author={Klepka, Tadeusz and Pluczny, Maciej},
|
||
|
year={2015},
|
||
|
booktitle={Proceedings of the International Conference on Machine Learning},
|
||
|
pages={33--44},
|
||
|
organization={ACM}
|
||
|
}
|
||
|
|
||
|
The current version of TacNet can play a single round against an opponent.
|
||
|
|
||
|
*A single round is a sequence of 3 turns by the two players.
|
||
|
*For a single turn, a player may play an action or pass.
|
||
|
*Each turn is defined by three values:
|
||
|
- the current player (an integer in the range [1, 2])
|
||
|
- the current state of the board (a list of 9 integers in the range [1, 2])
|
||
|
- the previous action of the opponent (an integer in the range [0, 3])
|
||
|
|
||
|
The board is a 3x3 matrix, with the cells numbered as follows:
|
||
|
|
||
|
1 2 3
|
||
|
4 5 6
|
||
|
7 8 9
|
||
|
|
||
|
The possible states are as follows.
|
||
|
|
||
|
1 : Empty cell
|
||
|
2 : X mark
|
||
|
3 : O mark
|
||
|
|
||
|
The parameters are as follows:
|
||
|
|
||
|
1. Learning rate (alpha)
|
||
|
2. Discount factor (gamma)
|
||
|
3. Neural network architecture (layers)
|
||
|
4. Optimizer (optimizer)
|
||
|
|
||
|
The current version of TacNet is published on GitHub: https://github.com/HaroSachan/TacNet
|
||
|
|
||
|
The algorithm can be found in a paper by Haro Sachan: https://arxiv.org/abs/1808.06993
|
||
|
]]
|
||
|
|
||
|
local m1 = {{0.0155074093490839, 0.006488165818154812, 0.00027295813197270036, -0.004079065751284361, 0.006884671747684479, 0.0042692008428275585, -0.009449069388210773, 0.0015732233878225088, -0.021253908053040504, -0.0015808097086846828, 0.02058781497180462, 0.014156852848827839, 0.0011407096171751618, 0.0026757230516523123, 0.10612194985151291, 0.0009431670187041163, -0.0068403566256165504, -2.948203109554015e-05, 0.014330151490867138, -0.010280955582857132, 0.00607257941737771, -0.007597192190587521, 0.010801678523421288, -0.4398154318332672, -0.011297371238470078, 0.0011513150529935956, 0.18024559319019318, -0.0026264109183102846, 0.2545485496520996, 0.005268874578177929, -0.22284995019435883, -0.0025158366188406944}, {-0.023265661671757698, -0.028199685737490654, 0.0019726157188415527, 0.002311018295586109, 0.0039128935895860195, -0.010445198975503445, -0.003814024617895484, 0.011082574725151062, 0.004536514636129141, -0.002593920798972249, 0.009368406608700752, 0.006720294710248709, 8.915900980355218e-05, 0.004164125304669142, 0.9897564053535461, 0.019276781007647514, -0.01060003973543644, -0.014678665436804295, -0.011010600253939629, -0.010455616749823093, -0.013942350633442402, 0.0043983678333461285, -0.002270421478897333, -2.5565435886383057, 0.005101943388581276, 0.005614021793007851, -0.07775966078042984, 0.004709700588136911, 1.195217490196228, -0.0025556227192282677, -0.011072738096117973, -0.003579859621822834}, {-0.0008956045494414866, 5.668430821970105e-05, -0.00031418423168361187, -0.01421699021011591, -0.008675345219671726, -0.0025554841849952936, 0.006331453565508127, -0.001803689869120717, -0.009321389719843864, -0.0074706594459712505, 0.01738801598548889, -0.008258081041276455, 0.013161792419850826, 0.01379439327865839, -0.041612766683101654, 0.010264900512993336, -0.008259480819106102, -0.005292787216603756, 0.013369467109441757, -0.02200317569077015, -0.0027856286615133286, -0.015384763479232788, 0.007832403294742107, 0.14879383146762848, 0.0022518739569932222, -0.0015280295629054308, 0.08750196546316147, -0.010823709890246391, -0.01591183803975582, -0.00044037486077286303, 0.024126602336764336, -0.013408261351287365}, {0.0006285661365836859, -0.011370187625288963, -0.014266930520534515, -0.016016153618693352, -0.005611760076135397, 0.02668766863644123, 0.0008600738365203142, 0.008757492527365685, -0.007828972302377224, -0.014785801991820335, 0.004311458673328161, 0.0020335528533905745, -0.015151389874517918, -0.01958364248275757, 0.8698611259460449, 0.0009036201518028975, 0.008898426778614521, 0.00037994986632838845, -0.0009526663925498724, -0.0005932285566814244, -0.021463865414261818, 0.0022706245072185993, -0.0007221369305625558, -0.28683385252952576, -0.004136236384510994, -0.008284393697977066, -0.0014376738108694553, 0.010943891480565071, -0.6084831357002258, -0.007891763933002949, 0.0010044678347185254, 0.010963504202663898}, {0.008662886917591095, 0.0016641751863062382, -0.004643036983907223, -0.002748311497271061, -0.004758197348564863, -0.007227372378110886, 0.0021537886932492256, -0.009522989392280579, 0.024872008711099625, -0.003176902187988162, -0.012698564678430557, 0.013463514856994152, -0.0038242435548454523, -0.013188377022743225, 0.12998083233833313, -0.001050448277965188, -0.0090628731995821, -0.005227814894169569, 0.013407512567937374, 0.003268970875069499, 0.00863635540008545, -0.0067290812730789185, -0.017506981268525124, 0.27209416031837463, -0.004974703304469585, 0.014676785096526146, 0.005096870474517345, -0.0023669705260545015, -0.09735122323036194, 0.00852327048778534, 0.029738785699009895, -0.00018516465206630528}, {0.0013851794647052884, 0.0075482577085494995, 0.0033859265968203545, -0.007900326512753963, 0.011987993493676186, 0.005661900620907545, 0.0027204789221286774, 0.0013737846165895462, -0.0040061213076114655, -0.01702849753201008, -0.024045083671808243, -0.001812430564314127, -0.007875361479818821, 0.0038145040161907673, 0.6438373327255249, -0.01725444197654724, 0.013205562718212605, -0.008944562636315823, 0.0036043482832610607, -0.013157244771718979, 0.00829
|
||
|
local m2 = {{0.07553824782371521, -0.0645296722650528, 0.10721559077501297, -0.03344632312655449, 0.04402335733175278, -0.14498655498027802, -0.24824872612953186, -0.028136080130934715, 0.010257996618747711, 0.04899561032652855, -0.07254855334758759, 0.04425831139087677, -0.12577120959758759, -0.06210765615105629, -0.2474345862865448, 0.19734469056129456, 0.08223488181829453, -0.2642064690589905, 0.20032833516597748, 0.1393246352672577, 0.24789603054523468, -0.029128318652510643, 0.03991561755537987, 0.08747734874486923, -0.15210981667041779, 0.21348346769809723, -0.004202730022370815, -0.07640890777111053, 0.035374537110328674, 0.20545758306980133, -0.13402153551578522, -0.013360240496695042, -0.06283942610025406, 0.004215680994093418, 0.052041616290807724, 0.17439034581184387, -0.04691479727625847, -0.10672907531261444, 0.06983888894319534, 0.06806627660989761, 0.09846071153879166, -0.015826642513275146, 0.0821738988161087, -0.09562093764543533, -0.026398606598377228, 0.1365816593170166, 0.10816899687051773, 0.053475674241781235, -0.03520507737994194, 0.05663060396909714, -0.0377238430082798, 0.051254600286483765, 0.15148167312145233, 0.02784220688045025, 0.12553270161151886, 0.10538196563720703, -0.03774934262037277, -0.030157925561070442, -0.018928879871964455, 0.21677447855472565, -0.15169253945350647, 0.03154894337058067, -0.07218336313962936, 0.20766714215278625, 0.06265395879745483, -0.11403381824493408, -0.040245164185762405, 0.09963522851467133, -0.06630754470825195, -0.0850624144077301, -0.006549973506480455, -0.023176562041044235, -0.24850808084011078, -0.10374903678894043, -0.027541888877749443, -0.043780986219644547, 0.16565898060798645, -0.1047585979104042, -0.1458377242088318, -0.21487028896808624, 0.2625219523906708, 0.10952800512313843, -0.080295130610466, -0.2025158852338791, -0.0033290924038738012, 0.010512113571166992, 0.06078812852501869, 0.044913001358509064, -0.010809013620018959, -0.05210274085402489, -0.08819323033094406, -0.012975985184311867, -0.12405148148536682, 0.16844694316387177, 0.01159491017460823, -0.0008553077350370586, -0.07364997267723083, 0.12349097430706024, 0.051770057529211044, -0.05794773995876312, -0.08141735196113586, -0.11702139675617218, 0.012413940392434597, 0.06812078505754471, 0.04283491522073746, 0.14569199085235596, -0.06163358315825462, -0.16245973110198975, 0.15673525631427765, 0.09435007721185684, -0.002995515475049615, -0.07004749029874802, -0.015164253301918507, 0.24665884673595428, 0.04964752867817879, -0.053110793232917786, -0.12439381331205368, 0.015544100664556026, -0.009431213140487671, -0.027016587555408478, -0.1101924255490303, 0.0286633912473917, 0.020475197583436966, -0.1841467022895813, 0.2083078920841217, 0.04791541025042534, -0.10505610704421997, 0.21289026737213135, 0.012760705314576626, 0.013979974202811718, -0.028596483170986176, -0.09427878260612488, 0.2807009220123291, 0.15835528075695038, 0.03745485097169876, 0.00626502325758338, -0.11637209355831146, 0.026969268918037415, 0.18618597090244293, -0.24378374218940735, 0.31604015827178955, 0.1815677434206009, -0.03278563544154167, 0.11942233145236969, 0.18154408037662506, 0.09858276695013046, -0.20427648723125458, -0.1102781742811203, -0.08439134061336517, 0.14521808922290802, 0.06375597417354584, 0.0015419838018715382, -0.11102668941020966, 0.004924634005874395, -0.10762850195169449, -0.03029109723865986, 0.1219550222158432, 0.043739549815654755, -0.056693390011787415, -0.1550617665052414, 0.09157174080610275, -0.0006331920740194619, -0.14751747250556946, 0.125230610370636, 0.0348643995821476, -0.005419035442173481, 0.12913982570171356, 0.020559782162308693, -0.010819525457918644, 0.00659601017832756, -0.031850166618824005, 0.19830839335918427, -0.1730603277683258, 0.04745963588356972, 0.38217002153396606, -0.12155912071466446, -0.12539425492286682, 0.005400398280471563, -0.30477866530418396, 0.1125781312584877, 0.030136732384562492, 0.22387877106666565, 0.17369307577610016, 0.010449954308569431, 0.1646101176738739, -0.05209542065858841, -0.1663273274898529, -0.13515152037143707, 0.0367365442216396
|
||
|
local m3 = {{0.009516230784356594, 0.0008425090927630663, -0.017556974664330482, 0.02476123720407486, -0.034104764461517334, -0.011140045709908009, 0.003947657998651266, -0.03618647903203964, -0.01101678516715765, 0.00030229284311644733, 0.014600289985537529, -0.002630482194945216, 0.007838805206120014, -0.0005172564415261149, -0.0037844639737159014, -0.006547580938786268, 0.01933031529188156, -0.018469903618097305, 0.019318249076604843, -0.00700779166072607, 0.001528410823084414, -0.020212141796946526, -0.011535385623574257, -0.01706167683005333, 0.0065964930690824986, 0.0052375649102032185, -0.018697207793593407, -0.006334342062473297, -0.009692884981632233, -0.022722972556948662, 0.02197665348649025, -0.0021998595912009478, 0.006585733965039253, -0.014808631502091885, -0.017702076584100723, 0.0002919542312156409, 0.00327065191231668, -0.030447693541646004, -0.009339185431599617, -0.026839585974812508, 0.017142461612820625, 0.01338908076286316, 0.025845259428024292, 0.005769837647676468, -0.012223068624734879, -0.007683220785111189, 0.014349495992064476, 0.0004278783162590116, 0.015890449285507202, -0.006705868989229202, -0.003823523875325918, -0.014069996774196625, 0.011142758652567863, -0.018648669123649597, 0.016940785571932793, -0.025992073118686676, 0.01406420860439539, 0.013590030372142792, -0.002768441569060087, -0.009542546235024929, 0.022065678611397743, -0.02242337539792061, -0.016950998455286026, -0.010218249633908272, -0.01981775090098381, 0.027655651792883873, 0.018984152004122734, -0.016047650948166847, -0.016686448827385902, -0.009950535371899605, 0.024117877706885338, -0.014002557843923569, 0.004535323474556208, -0.00038597718230448663, -0.027086971327662468, 0.007953743450343609, 0.002249007346108556, 0.04770814999938011, 0.001457726233638823, 0.012946750037372112, 0.006412704475224018, -0.025540916249155998, -0.011068882420659065, 0.028563037514686584, -0.003233348485082388, 0.025422489270567894, 0.0053801205940544605, -0.021807070821523666, 0.02243175357580185, -0.006487073376774788, 0.001496594282798469, -0.004277585074305534, -0.006896098610013723, -0.019910087808966637, -0.0025084929075092077, 0.012772764079272747, -0.03876601532101631, -0.003019219497218728, -0.009426457807421684, -0.031368497759103775, 0.004159184172749519, -0.006035245954990387, -0.0029123704880476, -0.006636423524469137, -0.0035145285073667765, 0.008156322874128819, -0.0005139620625413954, -0.016334783285856247, 0.0019581655506044626, -0.0024109994992613792, -0.01843879371881485, -0.022009186446666718, 0.009026749059557915, -0.011627603322267532, -0.011687044985592365, -0.004435592330992222, -0.009052526205778122, 0.017921777442097664, 0.0008525729645043612, 0.022320745512843132, -0.027022631838917732, -0.0200740285217762, -0.003349972190335393, 0.03742954507470131, 0.0010988248977810144, 0.007118892390280962, 0.00021665613166987896, 0.010995729826390743, -0.007925386540591717, 0.015884919092059135, -0.017087919637560844, -0.0005352856242097914, 0.015408642590045929, 0.00386289251036942, -0.004619075451046228, 0.003382954513654113, -0.0530519001185894, -0.018984798341989517, -0.006341319531202316, 0.010616196319460869, 0.016283299773931503, 0.020095815882086754, -0.00814361684024334, 0.016138175502419472, -0.008159846998751163, 0.009877338074147701, -0.0068206884898245335, 0.002201693132519722, -0.001729658804833889, 0.006905004382133484, -0.009472863748669624, 0.025870058685541153, 0.0066243726760149, 0.004255575593560934, -0.023563643917441368, -0.0028897391166538, -0.014729859307408333, -0.024066759273409843, 0.00457024946808815, -0.0041436399333179, 0.0031475743744522333, 0.014590512961149216, -0.008705848827958107, 0.006075338926166296, -0.015089615248143673, 0.030635613948106766, -0.0043508997187018394, 0.0025448654778301716, 0.011285186745226383, 0.007540541235357523, -0.015763312578201294, -0.010544901713728905, 0.008412657305598259, -0.013742371462285519, -0.018784286454319954, 0.018428392708301544, 0.0014808729756623507, -0.016916777938604355, 0.018245549872517586, -0.011576131917536259, -0.010058755986392498, 0.016
|
||
|
local m4 = {-0.12119200825691223, -0.4320903420448303, 0.16360077261924744, -0.024315910413861275, 0.318928062915802, 0.28024089336395264, -0.11023423820734024, 0.1976889669895172, 0.5035359859466553, -0.12433239072561264, 0.4639778137207031, 0.4027350842952728, 0.12333646416664124, -0.25493040680885315, -0.02231675200164318, 0.1948857307434082, -0.4754638671875, 0.16065751016139984, -0.23279736936092377, 0.1479167342185974, 0.009400470182299614, 0.47699084877967834, 0.08560270071029663, 0.292190283536911, -0.3599409759044647, 0.058879803866147995, 0.11652722209692001, 0.017723476514220238, 0.31027185916900635, 0.42517009377479553, -0.16898401081562042, 0.0004393960989546031, 0.3184901475906372, -0.3918144702911377, 0.6982641816139221, 0.19490303099155426, -0.043992869555950165, 0.5211102962493896, 0.27624088525772095, 0.18817909061908722, 0.0014870389131829143, 0.23174692690372467, -0.3168533742427826, -0.41062405705451965, -0.11928929388523102, 0.36713600158691406, -0.22957241535186768, 0.03572351485490799, -0.42685163021087646, -0.005996388848870993, 0.0681905448436737, -0.18357662856578827, -0.4264618456363678, -0.10152142494916916, 0.08709192276000977, 0.5503253936767578, -0.18714933097362518, -0.395602285861969, 0.44956478476524353, -0.015746744349598885, -0.17238160967826843, 0.1827181577682495, 0.4743066430091858, 0.6574476957321167, 0.38722914457321167, -0.16221420466899872, -0.22226335108280182, 0.566750705242157, 0.31580430269241333, -0.029436055570840836, -0.09957416355609894, -0.05943959951400757, -0.4640403985977173, -0.41787704825401306, -0.019030319526791573, -0.2625054717063904, -0.03521683067083359, -0.5692240595817566, 0.030449818819761276, -0.8333264589309692, -0.0997619777917862, 0.33173683285713196, 0.023881589993834496, -0.4496639668941498, 0.1895037442445755, 0.03452831134200096, -0.4834495484828949, 0.26014867424964905, -0.4918281137943268, 0.21658116579055786, -0.019057705998420715, -0.29834115505218506, -0.04869748279452324, 0.17696478962898254, -0.15104009211063385, -0.7326403856277466, 0.2771260142326355, -0.11435204744338989, 0.7235895395278931, 0.31815558671951294, -0.49658554792404175, 0.170261412858963, -0.35118937492370605, -0.0026072559412568808, 0.3665490448474884, -0.16517239809036255, 0.2335260808467865, 0.16591136157512665, 0.12471108138561249, -0.21304485201835632, 0.7565355896949768, 0.5610060691833496, 0.33956265449523926, 0.36495935916900635, 0.05180160328745842, -0.6422286629676819, 0.24114453792572021, -0.38803631067276, -0.029255546629428864, -0.08472876995801926, 0.4624996781349182, 0.8540431261062622, -0.19887351989746094, -0.24511770904064178, -0.09459339827299118, -0.07290452718734741, 0.069530189037323, -0.3269287943840027, 0.29990383982658386, -0.2062879204750061, 0.04589494317770004, 0.40371009707450867, 0.2186051905155182, -0.00915728323161602, 0.944208562374115, 0.02223058231174946, 0.3416841924190521, -0.1464000791311264, -0.36764127016067505, -0.24689121544361115, -0.15031121671199799, -0.4667811989784241, -0.3748229444026947, 0.050587594509124756, -0.212965190410614, -0.24506749212741852, -0.3570350408554077, -0.09480604529380798, -0.08399725705385208, -0.07389301061630249, 0.2477569729089737, -0.4943859875202179, 0.21521520614624023, -0.14873568713665009, 0.24749773740768433, 0.15940918028354645, -0.4732681214809418, 1.1500868797302246, -0.05571204796433449, 0.11321435868740082, 0.05533626675605774, -0.10508105903863907, 0.1509464979171753, -0.1316273957490921, -0.1409706324338913, -0.6401874423027039, 0.07622697204351425, -0.02066471241414547, 0.026243390515446663, -0.3827364444732666, -0.056412018835544586, 0.5131473541259766, -0.1578075885772705, 0.02683178335428238, 0.3737300634384155, 0.269226998090744, 0.05207091569900513, 0.0038901555817574263, -0.059790484607219696, 0.24472108483314514, 0.02900165133178234, 0.38514867424964905, 0.059309568256139755, -0.06608220934867859, 0.7094714045524597, 0.30065619945526123, 0.2561238706111908, -0.6245588064193726, 0.12549398839473724, 0.16537179052829742, -0.17407894134521484, 0.29258841276168823, -0.2057512253522873
|
||
|
local m5 = {-0.18638470768928528, -0.1896393597126007, -0.1889890730381012, -0.19049835205078125, -0.18795514106750488, -0.18933545053005219, -0.19069159030914307, -0.18700024485588074, -0.18877221643924713, -0.19134320318698883, -0.19095942378044128, -0.18640364706516266, -0.1892036348581314, -0.18770746886730194, 1.2487244606018066, -0.18736907839775085, -0.1877896934747696, -0.18898238241672516, -0.1888350248336792, -0.1865197867155075, -0.18802186846733093, -0.18859650194644928, -0.19018925726413727, 1.5589599609375, -0.18677210807800293, -0.18892286717891693, -0.11103861778974533, -0.18899531662464142, 1.566094994544983, -0.18721932172775269, 0.8411797285079956, -0.18822908401489258}
|
||
|
|
||
|
local function zipmap(f, ...)
|
||
|
local out = {}
|
||
|
local ars = {...}
|
||
|
for i in pairs(ars[1]) do
|
||
|
local arg = {}
|
||
|
for _, arr in pairs(ars) do
|
||
|
table.insert(arg, arr[i])
|
||
|
end
|
||
|
table.insert(out, f(table.unpack(arg, 1, #arg)))
|
||
|
end
|
||
|
return out
|
||
|
end
|
||
|
|
||
|
local function sum(arr)
|
||
|
local acc = 0
|
||
|
for _, v in pairs(arr) do
|
||
|
acc = acc + v
|
||
|
end
|
||
|
return acc
|
||
|
end
|
||
|
local function pairsum(...) return zipmap(function(...) return sum {...} end, ...) end
|
||
|
|
||
|
local function dot(mat, vec)
|
||
|
local out = {}
|
||
|
for i, row in pairs(mat) do
|
||
|
local acc = 0
|
||
|
for j, val in pairs(row) do
|
||
|
acc = acc + val * vec[j]
|
||
|
end
|
||
|
out[i] = acc
|
||
|
end
|
||
|
return out
|
||
|
end
|
||
|
|
||
|
local function zeroes(n)
|
||
|
local out = {}
|
||
|
for i = 1, n do
|
||
|
table.insert(out, 0)
|
||
|
end
|
||
|
return out
|
||
|
end
|
||
|
|
||
|
local chars = "abcdefghijklmnopqrstuvwxyz-|. \n_"
|
||
|
local i_to_c_map = {}
|
||
|
local c_to_i_map = {}
|
||
|
for i = 1, #chars do
|
||
|
i_to_c_map[i] = chars:sub(i, i)
|
||
|
c_to_i_map[chars:sub(i, i)] = i
|
||
|
end
|
||
|
|
||
|
local function onehot(char)
|
||
|
local x = zeroes(#chars)
|
||
|
x[c_to_i_map[char]] = 1
|
||
|
return x
|
||
|
end
|
||
|
|
||
|
local function softmax(v)
|
||
|
local out = zipmap(math.exp, v)
|
||
|
local s = sum(out)
|
||
|
return zipmap(function(x) return x / s end, out)
|
||
|
end
|
||
|
|
||
|
local function run_step(char, prev)
|
||
|
local new_hidden = zipmap(math.tanh, pairsum(dot(m1, onehot(char)), dot(m2, prev), m4))
|
||
|
return softmax(pairsum(dot(m3, new_hidden), m5)), new_hidden
|
||
|
end
|
||
|
|
||
|
local function do_sample(opts, weights)
|
||
|
local cweights = {}
|
||
|
local acc = 0
|
||
|
for i, w in pairs(weights) do
|
||
|
acc = acc + w
|
||
|
cweights[i] = acc
|
||
|
end
|
||
|
local value = math.random() * acc
|
||
|
for i, cw in pairs(cweights) do
|
||
|
if cw > value then
|
||
|
return opts[i]
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
local function run_seq(input, n)
|
||
|
local dist
|
||
|
local hidden = zeroes(#m4)
|
||
|
local buf = {}
|
||
|
for i = 1, #input - 1 do
|
||
|
dist, hidden = run_step(input:sub(i, i), hidden)
|
||
|
end
|
||
|
local char = input:sub(#input, #input)
|
||
|
for i = 1, n do
|
||
|
dist, hidden = run_step(char, hidden)
|
||
|
local sample = do_sample(i_to_c_map, dist)
|
||
|
table.insert(buf, sample)
|
||
|
char = buf[#buf]
|
||
|
end
|
||
|
return table.concat(buf)
|
||
|
end
|
||
|
|
||
|
local function read_board(s)
|
||
|
local out = {}
|
||
|
for x in (s .. "\n"):gmatch "([xo\\.]+)\n" do
|
||
|
x:gsub(".", function(a)
|
||
|
table.insert(out, a)
|
||
|
end)
|
||
|
end
|
||
|
return out
|
||
|
end
|
||
|
|
||
|
local function entry(s)
|
||
|
local board = read_board(s)
|
||
|
local output = read_board(run_seq(s .. "\n-\n", 11))
|
||
|
local choices = {}
|
||
|
for i = 1, #board do
|
||
|
if board[i] == "." and output[i] == "x" then
|
||
|
board[i] = "x"
|
||
|
return table.concat(board)
|
||
|
elseif board[i] == "." then
|
||
|
table.insert(choices, i)
|
||
|
end
|
||
|
end
|
||
|
board[choices[math.random(1, #choices)]] = "x"
|
||
|
return table.concat(board)
|
||
|
end
|
||
|
|
||
|
local char = "e"
|
||
|
local prev = zeroes(#m4)
|
||
|
local new_hidden = zipmap(math.tanh, pairsum(dot(m1, onehot(char)), dot(m2, prev), m4))
|
||
|
local output_pre_bias = softmax(pairsum(dot(m3, new_hidden), m5))
|
||
|
for x, v in ipairs(output_pre_bias) do
|
||
|
print((x-1) .. "|" .. v)
|
||
|
end
|