game: tiny_hanabi

GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC
GameType.dynamics = Dynamics.SEQUENTIAL
GameType.information = Information.IMPERFECT_INFORMATION
GameType.long_name = "Tiny Hanabi"
GameType.max_num_players = 10
GameType.min_num_players = 2
GameType.parameter_specification = ["num_actions", "num_chance", "num_players", "payoff"]
GameType.provides_information_state_string = True
GameType.provides_information_state_tensor = True
GameType.provides_observation_string = True
GameType.provides_observation_tensor = True
GameType.provides_factored_observation_string = False
GameType.reward_model = RewardModel.TERMINAL
GameType.short_name = "tiny_hanabi"
GameType.utility = Utility.IDENTICAL

NumDistinctActions() = 3
PolicyTensorShape() = [3]
MaxChanceOutcomes() = 2
GetParameters() = {num_actions=3,num_chance=2,num_players=2,payoff=10;0;0;4;8;4;10;0;0;0;0;10;4;8;4;0;0;10;0;0;10;4;8;4;0;0;0;10;0;0;4;8;4;10;0;0}
NumPlayers() = 2
MinUtility() = 0.0
MaxUtility() = 10.0
UtilitySum() = None
InformationStateTensorShape() = [8]
InformationStateTensorLayout() = TensorLayout.CHW
InformationStateTensorSize() = 8
ObservationTensorShape() = [8]
ObservationTensorLayout() = TensorLayout.CHW
ObservationTensorSize() = 8
MaxGameLength() = 2
ToString() = "tiny_hanabi()"

# State 0
IsTerminal() = False
History() = []
HistoryString() = ""
IsChanceNode() = True
IsSimultaneousNode() = False
CurrentPlayer() = -1
InformationStateString(0) = "p0"
InformationStateString(1) = "p1"
InformationStateTensor(0): ◯◯◯◯◯◯◯◯
InformationStateTensor(1): ◯◯◯◯◯◯◯◯
ObservationString(0) = "p0"
ObservationString(1) = "p1"
ObservationTensor(0): ◯◯◯◯◯◯◯◯
ObservationTensor(1): ◯◯◯◯◯◯◯◯
ChanceOutcomes() = [{0, 0.500000000000}, {1, 0.500000000000}]
LegalActions() = [0, 1]
StringLegalActions() = ["d0", "d1"]

# Apply action "d1"
action: 1

# State 1
# p0:d1
IsTerminal() = False
History() = [1]
HistoryString() = "1"
IsChanceNode() = True
IsSimultaneousNode() = False
CurrentPlayer() = -1
InformationStateString(0) = "p0:d1"
InformationStateString(1) = "p1"
InformationStateTensor(0): ◯◉◯◯◯◯◯◯
InformationStateTensor(1): ◯◯◯◯◯◯◯◯
ObservationString(0) = "p0:d1"
ObservationString(1) = "p1"
ObservationTensor(0): ◯◉◯◯◯◯◯◯
ObservationTensor(1): ◯◯◯◯◯◯◯◯
ChanceOutcomes() = [{0, 0.500000000000}, {1, 0.500000000000}]
LegalActions() = [0, 1]
StringLegalActions() = ["d0", "d1"]

# Apply action "d1"
action: 1

# State 2
# p0:d1 p1:d1
IsTerminal() = False
History() = [1, 1]
HistoryString() = "1, 1"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
InformationStateString(0) = "p0:d1"
InformationStateString(1) = "p1:d1"
InformationStateTensor(0): ◯◉◯◯◯◯◯◯
InformationStateTensor(1): ◯◉◯◯◯◯◯◯
ObservationString(0) = "p0:d1"
ObservationString(1) = "p1:d1"
ObservationTensor(0): ◯◉◯◯◯◯◯◯
ObservationTensor(1): ◯◉◯◯◯◯◯◯
Rewards() = [0.0, 0.0]
Returns() = [0.0, 0.0]
LegalActions() = [0, 1, 2]
StringLegalActions() = ["p0a0", "p0a1", "p0a2"]

# Apply action "p0a2"
action: 2

# State 3
# p0:d1 p1:d1 p0:a2
IsTerminal() = False
History() = [1, 1, 2]
HistoryString() = "1, 1, 2"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 1
InformationStateString(0) = "p0:d1 p0:a2"
InformationStateString(1) = "p1:d1 p0:a2"
InformationStateTensor(0): ◯◉◯◯◉◯◯◯
InformationStateTensor(1): ◯◉◯◯◉◯◯◯
ObservationString(0) = "p0:d1 p0:a2"
ObservationString(1) = "p1:d1 p0:a2"
ObservationTensor(0): ◯◉◯◯◉◯◯◯
ObservationTensor(1): ◯◉◯◯◉◯◯◯
Rewards() = [0.0, 0.0]
Returns() = [0.0, 0.0]
LegalActions() = [0, 1, 2]
StringLegalActions() = ["p1a0", "p1a1", "p1a2"]

# Apply action "p1a0"
action: 0

# State 4
# p0:d1 p1:d1 p0:a2 p1:a0
IsTerminal() = True
History() = [1, 1, 2, 0]
HistoryString() = "1, 1, 2, 0"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = -4
InformationStateString(0) = "p0:d1 p0:a2 p1:a0"
InformationStateString(1) = "p1:d1 p0:a2 p1:a0"
InformationStateTensor(0): ◯◉◯◯◉◉◯◯
InformationStateTensor(1): ◯◉◯◯◉◉◯◯
ObservationString(0) = "p0:d1 p0:a2 p1:a0"
ObservationString(1) = "p1:d1 p0:a2 p1:a0"
ObservationTensor(0): ◯◉◯◯◉◉◯◯
ObservationTensor(1): ◯◉◯◯◉◉◯◯
Rewards() = [10.0, 10.0]
Returns() = [10.0, 10.0]
