/* same as on main page, but translated to java */ // CHANGES: // * best = child with max number of visits (instead of max winrate) // * UCTK outside of sqrt(...) in uct formula // * randomresult non-global
class Node { public int wins=0; public int visits=0;
public int x, y; // position of move
//public Node parent; //optional public Node child; public Node sibling;
public Node(/*Node parent, */int x, int y) { this.x=x; this.y=y; }
public void update(int val) { visits++; wins+=val; }
public double getWinRate() { if (visits>0) return (double)wins / visits; else return 0; /* should not happen */; }
}
class Board {
// child with highest number of visits is used (not: best winrate) public Node getBestChild(Node root) { Node child = root.child; Node best_child = null; int best_visits= -1; while (child!=null) { // for all children if (child.visits>best_visits) { best_child=child; best_visits=child.visits; } child = child.sibling; } return best_child; }
public Node root=null;
public static final double UCTK = 0.44; // 0.44 = sqrt(1/5) // Larger values give uniform search // Smaller values give very selective search
public Node UCTSelect(Node node) { Node res=null; Node next = node.child; double best_uct=0;
while (next!=null) { // for all children double uctvalue; if (next.visits > 0) { double winrate=next.getWinRate(); double uct = UCTK * Math.sqrt( Math.log(node.visits) / next.visits ); uctvalue = winrate + uct; } else { // Always play a random unexplored move first uctvalue = 10000 + 1000*Math.random(); }
if (uctvalue > best_uct) { // get max uctvalue of all children best_uct = uctvalue; res = next; }
next = next.sibling; } return res; }
// return 0=lose 1=win for current player to move int playSimulation(Node n) { int randomresult=0; if (n.child==null && n.visits<10) { // 10 simulations until chilren are expanded (saves memory) randomresult = playRandomGame(); } else { if (n.child == null) createChildren(n);
Node next = UCTSelect(n); // select a move if (next==null) { /* ERROR */ } makeMove(next.x, next.y);
int res=playSimulation(next); randomresult = 1-res; }
n.update(1-randomresult); //update node (Node-wins are associated with moves in the Nodes) return randomresult; }
// generate a move, using the uct algorithm Move UCTSearch(int numsim) { root=new Node(-1,-1); //init uct tree createChildren(root);
Board clone=new Board(); for (int i=0; i<numsim; i++) { clone.copyStateFrom(this); clone.playSimulation(root); }
Node n=getBestChild(root); return new Move(n.x, n.y); }
// NOT IMPLEMENTED YET:
int BOARD_SIZE=19; int[][] f = new int[BOARD_SIZE][BOARD_SIZE]; // the board int cur_player=1; //player to make next move (1 or 2)
void makeMove(int x, int y) { f[x][y]=cur_player; cur_player=3-cur_player; }
public void makeRandomMove() { int x=0; int y=0; while (true) { x=rand.nextInt(BOARD_SIZE); y=rand.nextInt(BOARD_SIZE); if (f[x][y]==0 && isOnBoard(x,y)) break; } makeMove(x,y); }
// return 0=lose 1=win for current player to move int playRandomGame() { int cur_player1=cur_player; while (!isGameOver()) { makeRandomMove(); } return getWinner()==curplayer1 ? 1 : 0; }
// expand children in Node void createChildren(Node parent) { Node last=parent; for (int i=0; i<BOARD_SIZE; i++) for (int j=0; j<BOARD_SIZE; j++) if (isOnBoard(i, j) && f[i][j]==0) { Node node=new Node(i, j); if (last==parent) last.child=node; else last.sibling=node; last=node; } }
void copyStateFrom(Board b) {
}
} /* END: class Board */
Shouldn't the updateWin() function take into account whether it is black or white move? Currently it looks like a win for either is good for both, and that can't be right. Unless I'm missing something else.
I would say that it is playRandomGame() which should take into account whether it is black or white move this information is contained in randomresult.
Might the "clone.makeMove(next.move);" in the play simulation function slow it down by making the same move multiple times is it goes down through that node because it was good to search out lower nodes?
barabanus: Are you sure that 10 simulations until children are expanded is good for the algorithm? I've tried this and it appeared to slow down search.
Francois: I've found I get very similar results with that setting at 1 or 10. How much is your search slowed down by?
barabanus: I use the uct algorithm to search combinations (not for go game) and I get the following results with that variable: 0 -> 2.3 sec, 5 -> 9.8 sec, 10 -> 18.9 sec So I get the best results earlier when I don't use simulations until children are expanded. I expand them at once instead.
Francois: A value of 0 could indicate that you expand the whole search space before doing any playouts, which is not the UCT algo (if its possible to expand the entire search space, Minimax will probably be better), so I'll assume that's not what you meant. What takes 2.3s, 9.8s and 18.9s to complete? A fixed number of playouts? That doesn't seem correct. Can I ask what you are applying the UCT algo to?
barabanus: There's a huge search space of permutations. It's not possible to store it in memory. Some of permutations terminates early, but I search for the longest ones. Each node keeps max depth of the search tree that was possible to achieve from that node. I calculate winrate as (max_depth / expected_max_depth). I've tried different values for the random-playouts-before-expanding variable to find that it slows down the search. With immediate expanding it takes 2.3s to find the max depth, with 5 -> 9.8s, 10 -> 18.9s. I think that's because I need to deep the tree as much as possible, but this variable slows down the deepening. May be I miss something with uct? I'm new to it, I've read some papers, but may be I misuse it?
Francois: I'm still not sure what you're trying to search for. Do you have a search space and you're trying to find the longest possible path in it? Then UCT is not the way to do it. If you have a search space and you want to find the path where you are most likely to get the a favourable outcome, then UCT will work.
barabanus: Yes, I'm trying to find the longest possible path in the search space. Looks like you're right, the uct algorithm isn't the best choice for my task, but I couldn't know it before I tried. I think your last phrase should be written in the uct algorithm description, because it does explain uct designation! especially "most likely to get the favourable outcome" part. why your code is not work??