UCT/ Discussion

Sub-page of UCT
 /* same as on main page, but translated to java */
 // CHANGED: randomresult non-global
 class Node {
    public int wins=0;
    public int visits=0;
    public Move move=null;
    public Node bestNode=null;
    public Node child=null;
    public Node sibling=null;
    Node(Move m) {
        this.m=m;
    }
    public void setBest() {
        Node next = child;
        Node best = null;
        double best_winrate=-1;
        double winrate; // 0..1
        while (next!=null) { // for all children
            winrate = ((double)next.wins) / next.visits;
            if (winrate>best_winrate) {
                best=next;
                best_winrate = winrate;
            }
            next = next.sibling;
        }
        bestNode=best;
    }
    double getWinRate() {
        if (visits>0) return (double)wins / visits;
                 else return 0.5; /* should not happen */;
    }
 }
 class Board { /* BEGIN CLASS */
    double UCTK = 1;
    // Larger values give uniform search
    // Smaller values give very selective search
    Board clone;
    public Node UCTSelect(Node node) {
        Node res=null;
        Node next = node.child;
        double best_uct=0;
        while (next!=null) { // for all children
            double uctvalue;
            if (next.visits > 0) {
                double winrate=next.getWinRate();
                double uct = UCTK * Math.sqrt( Math.log(node.visits) / (5*next.visits) );
                uctvalue = winrate + uct;
            }
            else {
                // Always play a random unexplored move first
                uctvalue = 10000 + 1000*Math.random();
            }
            if (uctvalue > best_uct) { // get max uctvalue of all children
                    best_uct = uctvalue;
                    res = next;
            }
            next = next.sibling;
        }
        return res;
    }
    // returns 0 or 1 (=randomresult)
    int playSimulation(Node n) {
        int randomresult=0;
        if (n.visits==0) { // node exists, but no evaluation done yet (for this node)
            randomresult = 1-clone.playRandomGame();
        }
        else {
            if (n.child == null)
                createChildren(n);
            Node next = UCTSelect(n); // select a move
            if (next==null) { /* ERROR */ }
            clone.makeMove(next.move);
            randomresult = 1-playSimulation(next);
        }
        n.visits++;
        n.wins+=randomresult;
        if (n.child!=null)
            n.setBest();
    }
    Move UCTSearch(int numsim) {
        for (int i=0;i<numsim;i++) {
            clone.copyStateFrom(this);
            playSimulation(root);
        }
        return root.bestNode.move;
    }

// NOT IMPLEMENTED YET:

    int size_MAX=19;
    int[][] f = new int[size_MAX][size_MAX];
    int game_turn_player=1;
    void makeMove(int x, int y) {
      f[x][y]=game_turn_player;
      game_turn_player=3-game_turn_player;
    }
    public void makeRandomMove() {
      int x=0;
      int y=0;
      while (true) {
        x=rand.nextInt(size_MAX);
        y=rand.nextInt(size_MAX);
        if (f[x][y]==0 && isOnBoard(x,y)) break;
      }
      makeMove(x,y);
    }
    int playRandomGame() {
        return 0; // or 1
    }
    // returns: number childs created
    void createChildren(Node parent) {
      int cnt=0;
      Node last=parent;
      for (int i=0; i<size_MAX; i++)
        for (int j=0; j<size_MAX; j++)
          if (isOnBoard(i, j) && f[i][j]==0) {
            Node node=new Node(i, j);
        if (cnt==0) last.child=node;
               else last.sibling=node;
        last=node;
        cnt++;
      }
      return cnt;
    }
    void copyStateFrom(Board b) {
    }
 } /* END CLASS */

Shouldn't the updateWin() function take into account whether it is black or white move? Currently it looks like a win for either is good for both, and that can't be right. Unless I'm missing something else.

I would say that it is playRandomGame() which should take into account whether it is black or white move this information is contained in randomresult.

Might the "clone.makeMove(next.move);" in the play simulation function slow it down by making the same move multiple times is it goes down through that node because it was good to search out lower nodes?


This is a copy of the living page "UCT/ Discussion" at Sensei's Library.
(OC) 2008 the Authors, published under the OpenContent License V1.0.
[Welcome to Sensei's Library!]
StartingPoints
ReferenceSection
About