UCT/ Discussion

Sub-page of UCT
 /* same as on main page, but translated to java */
 // CHANGES:
 // * best = child with max number of visits (instead of max winrate)
 // * UCTK outside of sqrt(...) in uct formula
 // * randomresult non-global
 class Node {
    public int wins=0;
    public int visits=0;
    public int x, y; // position of move
    //public Node parent; //optional
    public Node child;
    public Node sibling;
    public Node(/*Node parent, */int x, int y) {
      this.x=x;
      this.y=y;
    }
    public void update(int val) {
      visits++;
      wins+=val;
    }
    public double getWinRate() {
        if (visits>0) return (double)wins / visits;
                 else return 0; /* should not happen */;
    }
 }
 class Board {
    // child with highest number of visits is used (not: best winrate)
    public Node getBestChild(Node root) {
        Node child = root.child;
        Node best_child = null;
        int  best_visits= -1;
        while (child!=null) { // for all children
            if (child.visits>best_visits) {
                best_child=child;
                best_visits=child.visits;
            }
            child = child.sibling;
        }
        return best;
    }
    public Node root=null;
    public static final double UCTK = 0.44; // 0.44 = sqrt(1/5)
    // Larger values give uniform search
    // Smaller values give very selective search
    public Node UCTSelect(Node node) {
        Node res=null;
        Node next = node.child;
        double best_uct=0;
        while (next!=null) { // for all children
            double uctvalue;
            if (next.visits > 0) {
                double winrate=next.getWinRate();
                double uct = UCTK * Math.sqrt( Math.log(node.visits) / next.visits );
                uctvalue = winrate + uct;
            }
            else {
                // Always play a random unexplored move first
                uctvalue = 10000 + 1000*Math.random();
            }
            if (uctvalue > best_uct) { // get max uctvalue of all children
                    best_uct = uctvalue;
                    res = next;
            }
            next = next.sibling;
        }
        return res;
    }
    // return 0=lose 1=win for current player to move
    int playSimulation(Node n) {
        int randomresult=0;
        if (n.child==null && n.visits<10) { // 10 simulations until chilren are expanded (saves memory)
            randomresult = playRandomGame();
        }
        else {
            if (n.child == null)
                createChildren(n);
            Node next = UCTSelect(n); // select a move
            if (next==null) { /* ERROR */ }
            makeMove(next.x, next.y);
            int res=playSimulation(next);
            randomresult = 1-res;
        }
        n.update(1-randomresult); //update node (Node-wins are associated with moves in the Nodes)
        return randomresult;
    }
    // generate a move, using the uct algorithm
    Move UCTSearch(int numsim) {
        root=new Node(-1,-1); //init uct tree
        createChildren(root);
        Board clone=new Board();
        for (int i=0; i<numsim; i++) {
            clone.copyStateFrom(this);
            clone.playSimulation(root);
        }
        Node n=getBestChild(root);
        return new Move(n.x, n.y);
    }

// NOT IMPLEMENTED YET:

    int BOARD_SIZE=19;
    int[][] f = new int[BOARD_SIZE][BOARD_SIZE]; // the board
    int cur_player=1; //player to make next move (1 or 2)
    void makeMove(int x, int y) {
      f[x][y]=cur_player;
      cur_player=3-cur_player;
    }
    public void makeRandomMove() {
      int x=0;
      int y=0;
      while (true) {
        x=rand.nextInt(BOARD_SIZE);
        y=rand.nextInt(BOARD_SIZE);
        if (f[x][y]==0 && isOnBoard(x,y)) break;
      }
      makeMove(x,y);
    }
    // return 0=lose 1=win for current player to move
    int playRandomGame() {
      int cur_player1=cur_player;
      while (!isGameOver()) {
        makeRandomMove();
      }
      return getWinner()==curplayer1 ? 1 : 0;
    }
    // expand children in Node
    void createChildren(Node parent) {
      Node last=parent;
      for (int i=0; i<BOARD_SIZE; i++)
        for (int j=0; j<BOARD_SIZE; j++)
          if (isOnBoard(i, j) && f[i][j]==0) {
            Node node=new Node(i, j);
            if (last==parent) last.child=node;
                         else last.sibling=node;
            last=node;
          }
    }
    void copyStateFrom(Board b) {
    }
 } /* END: class Board */

Shouldn't the updateWin() function take into account whether it is black or white move? Currently it looks like a win for either is good for both, and that can't be right. Unless I'm missing something else.

I would say that it is playRandomGame() which should take into account whether it is black or white move this information is contained in randomresult.

Might the "clone.makeMove(next.move);" in the play simulation function slow it down by making the same move multiple times is it goes down through that node because it was good to search out lower nodes?


This is a copy of the living page "UCT/ Discussion" at Sensei's Library.
(OC) 2009 the Authors, published under the OpenContent License V1.0.
[Welcome to Sensei's Library!]
StartingPoints
ReferenceSection
About