Created
August 27, 2023 21:28
-
-
Save masouduut94/6254555dac7f04736affd939d24abb34 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import sqrt, log | |
from copy import deepcopy | |
from random import choice, random | |
from time import clock | |
from gamestate import GameState | |
from uct_mcstsagent import Node, UctMctsAgent | |
from meta import * | |
class RaveNode(Node): | |
def __init__(self, move=None, parent=None): | |
""" | |
Initialize a new node with optional move and parent and initially empty | |
children list and rollout statistics and unspecified outcome. | |
""" | |
super(RaveNode, self).__init__(move, parent) | |
@property | |
def value(self, explore: float = MCTSMeta.EXPLORATION, rave_const: float = MCTSMeta.RAVE_CONST) -> float: | |
""" | |
Calculate the UCT value of this node relative to its parent, the parameter | |
"explore" specifies how much the value should favor nodes that have | |
yet to be thoroughly explored versus nodes that seem to have a high win | |
rate. | |
Currently explore is set to zero when choosing the best move to play so | |
that the move with the highest win_rate is always chosen. When searching | |
explore is set to EXPLORATION specified above. | |
""" | |
# unless explore is set to zero, maximally favor unexplored nodes | |
if self.N == 0: | |
return 0 if explore is 0 else GameMeta.INF | |
else: | |
# rave valuation: | |
alpha = max(0, (rave_const - self.N) / rave_const) | |
UCT = self.Q / self.N + explore * sqrt(2 * log(self.parent.N) / self.N) | |
AMAF = self.Q_RAVE / self.N_RAVE if self.N_RAVE is not 0 else 0 | |
return (1 - alpha) * UCT + alpha * AMAF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment