* rework of selection

* don't override nbmove to 0 * fix typo in simulation not playing * fix win verification * fix backpropagation * use iterator instead of time for algo limitation (MEMORY LEAK?) * remove debug prints
2023-03-31 16:39:27 +02:00 · 2023-03-31 16:39:27 +02:00 · 343a6e8cf0
commit 343a6e8cf0
parent b5c53228fd
2 changed files with 46 additions and 54 deletions
--- a/TP2/includes/mybt.h
+++ b/TP2/includes/mybt.h
@ -1,7 +1,6 @@
 #ifndef MYBT_H
 #define MYBT_H
 #include <chrono>
 #include <random>
 #define WHITE 0
@ -103,8 +102,8 @@ struct bt_t {
  // MCTS
  bt_node_t *mcts_selection(bt_node_t *);
  void mcts_expansion(bt_node_t *);
-  bool mcts_simulation(bt_node_t *);
+  bool mcts_simulation();
-  void mcts_back_propagation(bt_node_t *, bool);
+  static void mcts_back_propagation(bt_node_t *, bool);
  void add_move(int _li, int _ci, int _lf, int _cf) {
    moves[nb_moves].line_i = _li;
--- a/TP2/src/mybt.cpp
+++ b/TP2/src/mybt.cpp
@ -193,29 +193,39 @@ bt_move_t bt_t::get_rand_move() {
 }
 bt_node_t *bt_t::mcts_selection(bt_node_t *node) {
-  if (node->nb_simulation > 0) {
+  // Play the move is game over or first call
-    bt_node_t *best = nullptr;
+  if ((endgame() != EMPTY) ||
-    float score = std::numeric_limits<float>::lowest();
+      ((node->nb_simulation == 0) && (node->parent != nullptr))) {
-    for (auto i : node->children) {
+    play(node->move);
-      // https://en.wikipedia.org/wiki/Monte_Carlo_tree_search#Exploration_and_exploitation
+    return node;
      float curr_score =
          static_cast<float>(i->wins) / i->nb_simulation +
          sqrt(2) * sqrt(log(node->nb_simulation) / i->nb_simulation);
      if (curr_score > score) {
        best = i;
        score = curr_score;
      }
    }
    // Return the element with the higher score
    return best;
  } else {
    // Return random element if no simulation already done
    return node
        ->children[(static_cast<int>(rand())) % (node->children.size() - 1)];
  }
  bt_node_t *best = nullptr;
  float score = std::numeric_limits<float>::lowest();
  for (auto i : node->children) {
    // Play the move if never played
    if (i->nb_simulation == 0) {
      play(i->move);
      return i;
    }
    // https://en.wikipedia.org/wiki/Monte_Carlo_tree_search#Exploration_and_exploitation
    float curr_score =
        (static_cast<float>(i->wins) / i->nb_simulation) +
        ((sqrt(2) * sqrt(log(node->nb_simulation)) / i->nb_simulation));
    if (curr_score > score) {
      best = i;
      score = curr_score;
    }
  }
  play(best->move);
  return mcts_selection(best);
 }
-void bt_t::mcts_expansion(bt_node_t *root) {
+void bt_t::mcts_expansion(bt_node_t *node) {
  update_moves();
  for (int i = 0; i < nb_moves; i++) {
@ -223,32 +233,29 @@ void bt_t::mcts_expansion(bt_node_t *root) {
    bt_node_t *tmp = new bt_node_t;
    tmp->move = moves[i];
    tmp->wins = 0;
-    tmp->parent = root;
+    tmp->parent = node;
    tmp->nb_simulation = 0;
-    // Add child to root
+    // Add child to selected node
-    root->children.push_back(tmp);
+    node->children.push_back(tmp);
  }
  // Avoid re-add same moves
  nb_moves = 0;
 }
-bool bt_t::mcts_simulation(bt_node_t *node) {
+bool bt_t::mcts_simulation(void) {
  int me = (turn % 2 == 0) ? WHITE : BLACK;
  // then play randomly 'til the game is over
-  while (endgame() != EMPTY) {
+  while (endgame() == EMPTY) {
    play(get_rand_move());
  }
  // if i won
-  return ((turn % 2 == 0) ? WHITE : BLACK == me) ? true : false;
+  return ((turn % 2 == 0) ? WHITE : BLACK) == me ? true : false;
 }
 void bt_t::mcts_back_propagation(bt_node_t *simulated, bool won) {
  // propagate values to the top of the tree
-  while (simulated->parent != nullptr) {
+  while (simulated != nullptr) {
    if (won) {
      simulated->wins++;
    }
@ -276,40 +283,26 @@ bt_move_t bt_t::get_mcts_move(double max_time) {
  tree->children = {};
  tree->wins = 0;
  tree->nb_simulation = 0;
  print_vec(*this, tree->children);
  mcts_expansion(tree);
  print_vec(*this, tree->children);
-  // Time constraint
+  // On populise l'arbre avec les entrées de départ
-  auto now = std::chrono::steady_clock::now();
+  mcts_expansion(tree);
  std::chrono::duration<double> elapsed{};
  // MCTS
-  while (elapsed.count() < max_time) {
+  for (int it = 0; it < 200; ++it) {
    // Copy board
    bt_t copy_b = *this;
    // Selection
-    bt_node_t *selected = copy_b.mcts_selection(tree);
+    bt_node_t *result = copy_b.mcts_selection(tree);
    // Play the move
    if (can_play(selected->move)) {
      play(selected->move);
    } else {
      fprintf(stderr, "?????\n");
    }
    // Expansion
-    copy_b.mcts_expansion(selected);
+    copy_b.mcts_expansion(result);
    // Simulation
-    bool is_win = copy_b.mcts_simulation(selected);
+    bool is_win = copy_b.mcts_simulation();
    // Update
-    mcts_back_propagation(selected, is_win);
+    mcts_back_propagation(result, is_win);
    // Update elapsed time
    elapsed = std::chrono::steady_clock::now() - now;
  }
  // Select best move