game: coop_to_1p(game=tiny_hanabi())

GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC
GameType.dynamics = Dynamics.SEQUENTIAL
GameType.information = Information.IMPERFECT_INFORMATION
GameType.long_name = "1p(Tiny Hanabi)"
GameType.max_num_players = 1
GameType.min_num_players = 1
GameType.parameter_specification = ["game"]
GameType.provides_information_state_string = False
GameType.provides_information_state_tensor = False
GameType.provides_observation_string = True
GameType.provides_observation_tensor = True
GameType.provides_factored_observation_string = False
GameType.reward_model = RewardModel.TERMINAL
GameType.short_name = "coop_to_1p"
GameType.utility = Utility.GENERAL_SUM

NumDistinctActions() = 3
PolicyTensorShape() = [3]
MaxChanceOutcomes() = 2
GetParameters() = {game=tiny_hanabi()}
NumPlayers() = 1
MinUtility() = 0.0
MaxUtility() = 10.0
UtilitySum() = None
ObservationTensorShape() = [15]
ObservationTensorLayout() = TensorLayout.CHW
ObservationTensorSize() = 15
MaxGameLength() = 4
ToString() = "coop_to_1p(game=tiny_hanabi())"

# State 0
#
IsTerminal() = False
History() = []
HistoryString() = ""
IsChanceNode() = True
IsSimultaneousNode() = False
CurrentPlayer() = -1
ObservationString(0) = "Player 0\nNew Game\n"
ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯
ChanceOutcomes() = [{0, 0.500000000000}, {1, 0.500000000000}]
LegalActions() = [0, 1]
StringLegalActions() = ["d0", "d1"]

# Apply action "d0"
action: 0

# State 1
# p0:d0
# Player 0 possible: d0 d1
IsTerminal() = False
History() = [0]
HistoryString() = "0"
IsChanceNode() = True
IsSimultaneousNode() = False
CurrentPlayer() = -1
ObservationString(0) = "Player 0\nNew Game\nPlayer 0 possible: d0 d1\n"
ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯
ChanceOutcomes() = [{0, 0.500000000000}, {1, 0.500000000000}]
LegalActions() = [0, 1]
StringLegalActions() = ["d0", "d1"]

# Apply action "d1"
action: 1

# State 2
# p0:d0 p1:d1
# Player 0 p0a0: none
# Player 0 p0a1: none
# Player 0 p0a2: none
# Player 0 unassigned: d0 d1
# Player 1 possible: d0 d1
IsTerminal() = False
History() = [0, 1]
HistoryString() = "0, 1"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
ObservationString(0) = "Player 0\nNew Game\nPlayer 0 p0a0: none\nPlayer 0 p0a1: none\nPlayer 0 p0a2: none\nPlayer 0 unassigned: d0 d1\nPlayer 1 possible: d0 d1\n"
ObservationTensor(0): ◯◯◯◉◉◉◉◯◯◯◯◯◯◉◯
Rewards() = [0.0]
Returns() = [0.0]
LegalActions() = [0, 1, 2]
StringLegalActions() = ["d0->p0a0", "d0->p0a1", "d0->p0a2"]

# Apply action "d0->p0a2"
action: 2

# State 3
# p0:d0 p1:d1
# Player 0 p0a0: none
# Player 0 p0a1: none
# Player 0 p0a2: d0
# Player 0 unassigned: d1
# Player 1 possible: d0 d1
IsTerminal() = False
History() = [0, 1, 2]
HistoryString() = "0, 1, 2"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
ObservationString(0) = "Player 0\nNew Game\nPlayer 0 p0a0: none\nPlayer 0 p0a1: none\nPlayer 0 p0a2: d0\nPlayer 0 unassigned: d1\nPlayer 1 possible: d0 d1\n"
ObservationTensor(0): ◯◯◯◉◉◉◉◯◯◯◯◉◯◯◉
Rewards() = [0.0]
Returns() = [0.0]
LegalActions() = [0, 1, 2]
StringLegalActions() = ["d1->p0a0", "d1->p0a1", "d1->p0a2"]

# Apply action "d1->p0a1"
action: 1

# State 4
# p0:d0 p1:d1 p0:a2
# Player 0 possible: d0
# Player 1 p1a0: none
# Player 1 p1a1: none
# Player 1 p1a2: none
# Player 1 unassigned: d0 d1
IsTerminal() = False
History() = [0, 1, 2, 1]
HistoryString() = "0, 1, 2, 1"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
ObservationString(0) = "Player 0\np0a2\nPlayer 0 possible: d0\nPlayer 1 p1a0: none\nPlayer 1 p1a1: none\nPlayer 1 p1a2: none\nPlayer 1 unassigned: d0 d1\n"
ObservationTensor(0): ◯◯◉◉◯◉◉◯◯◯◯◯◯◉◯
Rewards() = [0.0]
Returns() = [0.0]
LegalActions() = [0, 1, 2]
StringLegalActions() = ["d0->p1a0", "d0->p1a1", "d0->p1a2"]

# Apply action "d0->p1a2"
action: 2

# State 5
# p0:d0 p1:d1 p0:a2
# Player 0 possible: d0
# Player 1 p1a0: none
# Player 1 p1a1: none
# Player 1 p1a2: d0
# Player 1 unassigned: d1
IsTerminal() = False
History() = [0, 1, 2, 1, 2]
HistoryString() = "0, 1, 2, 1, 2"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
ObservationString(0) = "Player 0\np0a2\nPlayer 0 possible: d0\nPlayer 1 p1a0: none\nPlayer 1 p1a1: none\nPlayer 1 p1a2: d0\nPlayer 1 unassigned: d1\n"
ObservationTensor(0): ◯◯◉◉◯◉◉◯◯◯◯◉◯◯◉
Rewards() = [0.0]
Returns() = [0.0]
LegalActions() = [0, 1, 2]
StringLegalActions() = ["d1->p1a0", "d1->p1a1", "d1->p1a2"]

# Apply action "d1->p1a2"
action: 2

# State 6
# p0:d0 p1:d1 p0:a2 p1:a2
# Player 0 possible: d0
# Player 1 possible: d0 d1
IsTerminal() = True
History() = [0, 1, 2, 1, 2, 2]
HistoryString() = "0, 1, 2, 1, 2, 2"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = -4
ObservationString(0) = "Player 0\np1a2\nPlayer 0 possible: d0\nPlayer 1 possible: d0 d1\n"
ObservationTensor(0): ◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯
Rewards() = [10.0]
Returns() = [10.0]
