from BaseEnv import ENV
import numpy as np


class ThetaTwoTheta(ENV):

    NUM_STATES = 2
    NUM_ACTIONS = 1
    NUM_FEATURES = 1

    THETA_STATE = 0
    TWOTHETA_STATE = 1

    EPISLON_TERMINATION = 0.01
    WINDOW_SIZE = 1

    def __init__(self, normalize_feature, isNoisy,noise):

        super().__init__("ThetaTwoTheta")

        self.reward = 0
        self.current_state = None
        self.features = np.zeros((self.NUM_ACTIONS, self.NUM_STATES, self.NUM_FEATURES))
        self.init_feature(normalize_feature)
        self.isNoisy = isNoisy
        self.noise = noise

    def init_feature(self, normalize_feature):

        self.features[:, self.THETA_STATE, :] = 1
        self.features[:, self.TWOTHETA_STATE, :] = 2

        if normalize_feature:
            self.features /= 2

    def reset(self):
        self.current_state = np.random.randint(
            0, self.NUM_STATES
        )  # self.THETA_STATE # start at state 0
        return self.current_state

    def step(self, action):

        reward = np.random.normal(0, self.noise, 1) if self.isNoisy else 0
        done = False

        if self.current_state == self.THETA_STATE:
            next_state = self.TWOTHETA_STATE
        if self.current_state == self.TWOTHETA_STATE:
            next_state = self.TWOTHETA_STATE
            #done = True #if np.random.randn()<self.EPISLON_TERMINATION else False

        self.current_state = next_state

        return [next_state, reward, done]