import tensorflow as tf

"""
    The hyper parameter of the ppo algorithm
    also used for ppo-pbrs
"""
ppo_hyper_params = {
    "gamma": 0.999,
    "lmda": 0.95,
    "truncation_size": 20000,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "actor_gradient_clip": False,
    "critic_gradient_clip": False,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "entropy_coeff": 0.0,
    "ratio_clip_param": 0.5,
    "adam_epsilon": 1e-5,
    "optim_epochs": 50,
    "optim_batch_size": 1024,
    "policy_net_layers": [8, 8],
    "v_net_layers": [32, 32],
    "gaussian_fixed_var": False
}

"""
    the hyper parameters of the ppo-dpba algorithm
"""
ppo_dpba_hyper_params = {
    "gamma": 0.999,
    "lmda": 0.95,
    "truncation_size": 20000,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_phi": 5e-4,
    "actor_gradient_clip": False,
    "critic_gradient_clip": False,
    "phi_gradient_clip": False,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "phi_gradient_norm_clip": 50.0,
    "entropy_coeff": 0.0,
    "ratio_clip_param": 0.5,
    "adam_epsilon": 1e-5,
    "optim_epochs": 50,
    "optim_batch_size": 1024,
    "policy_net_layers": [8, 8],
    "v_net_layers": [32, 32],
    "phi_net_layers": [16, 8],
    "gaussian_fixed_var": False,
    "tau": 0.01,
    "phi_hidden_layer_act_func": tf.nn.tanh
}

"""
    the hyper parameters of the ppo-oprs-v1-fop algorithm
"""
ppo_oprs_v1_hyper_params = {
    "gamma": 0.999,
    "lmda": 0.95,
    "truncation_size": 20000,
    "update_num_per_switch": 1,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_f": 5e-4,
    "actor_gradient_clip": False,
    "critic_gradient_clip": False,
    "f_gradient_clip": False,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "f_gradient_norm_clip": 50.0,
    "entropy_coeff": 0.0,
    "ratio_clip_param": 0.5,
    "adam_epsilon": 1e-5,
    "optim_epochs": 50,
    "optim_batch_size": 1024,
    "policy_net_layers": [8, 8],
    "v_net_layers": [32, 32],
    "f_net_layers": [16, 8],
    "gaussian_fixed_var": False,
    "joint_opt": False,
    "use_f_old": True, #False,
    "net_add_one": True,
    "f_hidden_layer_act_func": tf.nn.tanh
}

"""
    the hyper parameters of the ppo-oprs-v2-fop algorithm
"""
ppo_oprs_v2_hyper_params = {
    "gamma": 0.999,
    "lmda": 0.95,
    "truncation_size": 20000,
    "update_num_per_switch": 1,
    "nabla_theta_wrt_phi_sam_num": 100,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_f": 5e-4,
    "actor_gradient_clip": False,
    "critic_gradient_clip": False,
    "f_gradient_clip": False,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "f_gradient_norm_clip": 10.0,
    "entropy_coeff": 0.0,
    "ratio_clip_param": 0.5,
    "adam_epsilon": 1e-5,
    "optim_epochs": 50,
    "optim_batch_size": 1024,
    "policy_net_layers": [8, 8],
    "v_net_layers": [32, 32],
    "f_net_layers": [16, 8],
    "gaussian_fixed_var": False,
    "joint_opt": False,
    "use_f_old": False,
    "net_add_one": True,
    "f_hidden_layer_act_func": tf.nn.tanh
}

"""
    the hyper parameters of the ppo-oprs-v3 algorithm
"""
ppo_oprs_v3_hyper_params = {
    "gamma": 0.999,
    "lmda": 0.95,
    "truncation_size": 20000,
    "update_num_per_switch": 1,
    "nabla_theta_wrt_phi_sam_num": 100,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_f": 5e-4,
    "lr_h": 1e-3,
    "actor_gradient_clip": False,
    "critic_gradient_clip": False,
    "f_gradient_clip": False,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "f_gradient_norm_clip": 10.0,
    "entropy_coeff": 0.0,
    "ratio_clip_param": 0.5,
    "adam_epsilon": 1e-5,
    "optim_epochs": 50,
    "optim_batch_size": 1024,
    "policy_net_layers": [8, 8],
    "v_net_layers": [32, 32],
    "f_net_layers": [16, 8],
    "gaussian_fixed_var": False,
    "joint_opt": False,
    "use_f_old": False,
    "enable_hessian_computing": True,
    "hessian_opg_approx": False,
    "net_add_one": True,
    "f_hidden_layer_act_func": tf.nn.tanh
}