add simulation and backpropagation, some cleanup

2023-03-29 18:25:39 +02:00 · 2023-03-29 18:25:39 +02:00 · def372fcc6
commit def372fcc6
parent 91e6af8998
2 changed files with 37 additions and 17 deletions
--- a/TP2/includes/mybt.h
+++ b/TP2/includes/mybt.h
@ -103,8 +103,8 @@ struct bt_t {
  // MCTS
  bt_node_t *mcts_selection(bt_node_t *);
  void mcts_expansion(bt_node_t *);
-  void mcts_simulation();
-  void mcts_back_propagation();
+  bool mcts_simulation(bt_node_t *);
+  void mcts_back_propagation(bt_node_t *, bool);

  // déclarées mais non définies
  double eval();
--- a/TP2/src/mybt.cpp
+++ b/TP2/src/mybt.cpp
@ -195,7 +195,7 @@ bt_move_t bt_t::get_rand_move() {

 bt_node_t *bt_t::mcts_selection(bt_node_t *node) {
  if (node->nb_simulation) {
-    bt_node_t *the_chosen = NULL;
+    bt_node_t *the_chosen = nullptr;
    float score = 0.;
    for (auto i : node->children) {
      // https://en.wikipedia.org/wiki/Monte_Carlo_tree_search#Exploration_and_exploitation
@ -231,22 +231,43 @@ void bt_t::mcts_expansion(bt_node_t *root) {
  }
 }

-void bt_t::mcts_simulation(void) {}
+bool bt_t::mcts_simulation(bt_node_t *node) {
+  bt_t b_copy = *this;

-void bt_t::mcts_back_propagation(void) {}
+  int me = (b_copy.turn % 2 == 0) ? WHITE : BLACK;
+
+  // try to play my move
+  b_copy.play(node->move);
+
+  // then play randomly 'til the game is over
+  while (b_copy.endgame() != EMPTY) {
+    b_copy.play(b_copy.get_rand_move());
+  }
+
+  // if i won
+  return ((b_copy.turn % 2 == 0) ? WHITE : BLACK == me) ? true : false;
+}
+
+void bt_t::mcts_back_propagation(bt_node_t *simulated, bool won) {
+  // propagate values to the top of the tree
+  while (simulated->parent != nullptr) {
+    if (won) {
+      simulated->wins++;
+    }
+    simulated->nb_simulation++;
+    simulated = simulated->parent;
+  }
+}

 bt_move_t bt_t::get_mcts_move(double max_time) {
  // Init tree
  bt_node_t *tree = new bt_node_t();
-  tree->parent = NULL;
+  tree->parent = nullptr;
  tree->children = {};
  tree->wins = 0;
  tree->nb_simulation = 0;
  mcts_expansion(tree);

-  // Copy board
-  bt_t b_copy = *this;
-
  // Time constraint
  auto now = std::chrono::steady_clock::now();
  std::chrono::duration<double> elapsed{};
@ -254,31 +275,30 @@ bt_move_t bt_t::get_mcts_move(double max_time) {
  // MCTS
  while (elapsed.count() < max_time) {
    // Selection
-    auto selected = b_copy.mcts_selection(tree);
+    auto selected = mcts_selection(tree);

    // Expansion
-    b_copy.mcts_expansion(selected);
+    mcts_expansion(selected);

    // Simulation
-    // bool is_win = mcts_simulation(selected);
+    bool is_win = mcts_simulation(selected);

    // Update
-    // mcts_back_propagation(selected, is_win);
+    mcts_back_propagation(selected, is_win);

-    // Time constraint
+    // Update elapsed time
    elapsed = std::chrono::steady_clock::now() - now;
  }

  // Select best move
-  /* bt_node_t *best_node = NULL;
+  bt_node_t *best_node = nullptr;
  int best_score = -1;
  for (auto i : tree->children) {
    if (i->nb_simulation > best_score) {
      best_node = i;
    }
  }
-  return best_node->move; */
-  return moves[(static_cast<int>(rand())) % nb_moves]; // TMP
+  return best_node->move;
 }

 bool bt_t::can_play(bt_move_t _m) {