import numpy as np

class AnonymousBandits:
    def __init__(self, means, C, T):
        self.means = means
        self.N = len(means)
        self.K = len(means[0])
        self.C = C
        self.compute_U()
        
        self.cur_round = 0
        self.T = T

        self.is_anonymous = True
        
        self.total_utility = 0.
        self.opt_util_per_round = sum(max(user_means) for user_means in self.means)
        
        self.history = [0.]

    def compute_U(self):
        users_by_best_arm = [0 for _ in range(self.K)]

        for i in range(self.N):
            users_by_best_arm[np.argmax(self.means[i])] += 1

        self.U = min(users_by_best_arm)


    def sample_user_arm(self, user, arm):
        return 1.0 if np.random.random() < self.means[user][arm] else 0.0

    def do_round(self, groups, arms, anonymize_feedback=True):
        assert len(groups) == len(arms)

        feedback = []

        round_utility = 0.
        for group, arm in zip(groups, arms):
            group_utility = 0.
            for user in group:
                group_utility += self.sample_user_arm(user, arm)
                round_utility += self.means[user][arm]
            
            if anonymize_feedback and len(group) < self.C:
                feedback.append(None)
            else:
                feedback.append(group_utility)


        self.cur_round += 1
        self.total_utility += round_utility
        self.history.append(self.total_utility)

        return feedback
    
    def regret_timeseries(self):
        return [t*self.opt_util_per_round - cum_reward for t, cum_reward in enumerate(self.history)]

    def is_running(self):
        return self.cur_round < self.T

def generate_means(N, K):
    return [[np.random.random() for _ in range(K)] for _ in range(N)]

def generate_means_linear_model(N, K, dim=10):
    user_vecs = [[np.random.normal() for _ in range(dim)] for _ in range(N)]
    arm_vecs = [[np.random.normal() for _ in range(dim)] for _ in range(K)]

    cosine_dist = lambda v1, v2: np.dot(v1, v2)/(np.linalg.norm(v1) * np.linalg.norm(v2))

    return [[0.5 * (1 + cosine_dist(user, arm)) for arm in arm_vecs] for user in user_vecs]