#include <stdlib.h>
#include <iostream>
#include <vector>
#include <assert.h>
#include <limits>
#include <fstream>
#include <list>
#include <map>
#include <random>
#include <string.h>

#include "InfInt/InfInt.h"

long double epsilon; // Error tolerance
long long D; // Constant from Definition 4.7

size_t H;  // Number of hypotheses
size_t NC; // Number of clusters
size_t SC; // Size of clusters
size_t S;  // Specificity: number of hypotheses interested in each cluster
size_t T;  // Target interests: number of clusters target interested in
size_t Q;  // Number of queries

size_t N;  // Number of thresholds

using namespace std;

// pow for integers
// O(exp)
int power(const int &base, const int &exp)
{
  int ans = 1;
  for (int i = 0; i < exp; i++) {
    ans *= base;
  }
  return ans;
}

// used for doubly truncated hypothesis and distance function (F and G)
InfInt truncate(const InfInt &a, const InfInt &b, const InfInt &c)
{
  return max(min(a, b), c) - c;
}

// returns smallest integer i such that i * step >= v
InfInt inline ceil(long double v, long double step)
{
  return  InfInt((long long) (ceil(v / step) + 0.1));
}

// returns largest integer i such that i * step < v
InfInt inline floor(long double v, long double step)
{
  return InfInt((long long) (floor(v / step) + 0.1));
}

// Utility function
// Sum of square root of size of each cluster
// response is not actually used
long double inline F_prime(const vector<int> &response_to_query, const vector<int> &query, const vector<int> &response)
{
  vector<int> count(NC, 0);
  for (size_t i = 0; i < query.size(); i++) {
    if (response_to_query[query[i]] == 1) {
      count[query[i] / SC]++;
    }
  }

  long double ans = 0;
  for (size_t i = 0; i < SC; i++) {
    ans += sqrt(count[i]);
  }
  return ans;
}

// Distance function
// Number of different responses
// Coded in a more complicated way than necessary
// - could be replaced with a slightly different function
long long inline G_prime(const vector<int> &response_to_query, const vector<int> &query, const vector<int> &response)
{
  vector<int> count(NC, 0);
  for (size_t i = 0; i < query.size(); i++) {
    if (response_to_query[query[i]] != response[i]) {
      count[query[i] / SC]++;
    }
  }

  long long ans = 0;
  for (size_t i = 0; i < SC; i++) {
    ans += count[i];
  }
  return ans;
}

class Hypothesis
{
public:
  vector<int> response_to_query;

  Hypothesis()
  {
  }

  Hypothesis(vector<int> response_to_query)
  {
    this->response_to_query = response_to_query;
  }

  // Conversion from Definition 4.7
  InfInt F(const vector<int> &query, const vector<int> &response) const
  {
    long double F_p = F_prime(response_to_query, query, response);
    size_t S = query.size();
    long double increment = epsilon / D * (S * (long double) Q - S * (long double) (S - 1) / 2);
    return ceil(F_p + increment, epsilon / D);
  }

  // No approximation needed: G_prime is already an integer
  InfInt G(const vector<int> &query, const vector<int> &response) const
  {
    long long G_p = G_prime(response_to_query, query, response);
    return InfInt(G_p);
  }
};

class Dataset
{
public:
  vector<Hypothesis> hypothesis;

  Dataset()
  {
  }

  Dataset(vector<Hypothesis> hypothesis)
  {
    this->hypothesis = hypothesis;
  }

  InfInt inline F(const int &h, const vector<int> &query, const vector<int> &response) const
  {
    return hypothesis[h].F(query, response);
  }
  
  InfInt inline G(const int &h, const vector<int> &query, const vector<int> &response) const
  {
    return hypothesis[h].G(query, response);
  }

  // Definition 4.3
  // This expects alpha and kappa to be padded with zeros (convert from zero-indexing to one-indexing used in paper)
  InfInt F_bar(const vector<int> &query, const vector<int> &response, const vector<InfInt> &alpha, const vector<InfInt> &kappa, const size_t &N) const
  {
    const size_t H = hypothesis.size();

    InfInt factor = 1;
    for (size_t i = 1; i <= N; i++) {
      factor *= (kappa[i] - kappa[i - 1]);
    }
    // f[i] = \prod_{n=1}^{N}(\kappa_n - \kappa_{n - 1})
    vector<InfInt> f(N + 1, factor);
    for (size_t i = 1; i <= N; i++) {
      f[i] /= (kappa[i] - kappa[i - 1]);
    }

    InfInt ans = 0;
    for (size_t h = 0; h < H; h++) {
      InfInt Fh = F(h, query, response);
      InfInt Gh = G(h, query, response);
      for (size_t i = 1; i <= N; i++) {
        ans += f[i] * (((kappa[i] - kappa[i - 1]) - truncate(Gh, kappa[i], kappa[i - 1])) * truncate(Fh, alpha[i], alpha[i + 1]) + truncate(Gh, kappa[i], kappa[i - 1]) * (alpha[i] - alpha[i + 1]));
      }
    }
    return ans;
  }
};

// Structure to hold the results of the greedy algorithm (Algorithm 1)
class GreedyResult
{
public:
  vector<int> query;
  vector<int> response;
  long double cost;

  GreedyResult()
  {
  }

  GreedyResult(vector<int> query, vector<int> response, long double cost)
  {
    this->query = query;
    this->response = response;
    this->cost = cost;
  }
};

// Runs Algorithm 1
// alpha is assumed to have one zero padded at the front and back
// kappa is assumed to have one zero padded at the front
// (the zero at the front shifts from zero-indexing to the one-indexing used
//  in the paper, and alpha_{N+1} is used, so the extra zero at the back is
//  needed)
// Only uses unit cost of queries
GreedyResult greedy(const Dataset &dataset, vector<InfInt> alpha, vector<InfInt> kappa, const Hypothesis &target, const int &Q, const int &R, vector<int> query = vector<int>(), vector<int> response = vector<int>())
{
  long double c = 0;

  vector<bool> used(Q, false);
  for (size_t i = 0; i < query.size(); i++) {
    used[query[i]] = true;
  }

  const size_t N = alpha.size() - 2;
  assert(kappa.size() - 1 == N);

  InfInt Fb_max = InfInt(dataset.hypothesis.size()) * alpha[1];
  for (size_t i = 1; i <= N; i++) {
    Fb_max *= (kappa[i] - kappa[i - 1]);
  }

  InfInt Fb = dataset.F_bar(query, response, alpha, kappa, N);
  while (Fb < Fb_max) {
    int best_q = -1;
    InfInt change = -1;
    for (int i = 0; i < Q; i++) {
      if (used[i]) {
        continue;
      }

      vector<int> q_prime = query;
      vector<int> r_prime = response;
  
      q_prime.push_back(i);
      r_prime.push_back(-1);

      r_prime[response.size()] = 0;
      InfInt ch = dataset.F_bar(q_prime, r_prime, alpha, kappa, N) - Fb;
      for (int j = 1; j < R; j++) {
        r_prime[response.size()] = j;
        ch = min(ch, dataset.F_bar(q_prime, r_prime, alpha, kappa, N) - Fb);
      }

      if (ch > change) {
        best_q = i;
        change = ch;
      }
    }

    if (best_q == -1) {
      return GreedyResult(query, response, -1);
    }

    used[best_q] = true;
    query.push_back(best_q);
    response.push_back(target.response_to_query[best_q]);
    Fb = dataset.F_bar(query, response, alpha, kappa, N);

    c++;
  }

  return GreedyResult(query, response, c);
}


int main(int argc, char *argv[])
{
  if (argc != 9) {
    cout << "Usage: " << argv[0] << " <|H|> <Clusters> <Size of Cluster> <Specificity> <Target Interests> <epsilon> <N> <Seed>\n";
    return 0;
  }

  H  = atoi(argv[1]); // Number of hypotheses
  NC = atoi(argv[2]); // Number of clusters
  SC = atoi(argv[3]); // Size of clusters
  S  = atoi(argv[4]); // Specificity: number of hypotheses interested in each cluster
  T  = atoi(argv[5]); // Target interests: number of clusters target interested in
  epsilon  = atof(argv[6]);
  N = atoi(argv[7]); // number of thresholds
  Q  = NC * SC;     ; // number of queries

  srand(atoi(argv[8])); // Seed random number generator

  // indices 0 => C - 1 are cluster 1, C => 2 * C - 1 are cluster two, ...
  vector<int> response_to_query(Q, 0);
  vector<Hypothesis> hypothesis(H, Hypothesis(response_to_query));

  for (size_t c = 0; c < NC; c++) { // select for each cluster
    for (size_t s = 0; s < S; s++) { // S hypotheses are needed for each cluster
      while (true) { // pick random hypotheses until one that wasn't used yet is found
        int h = rand() % H;
        if (hypothesis[h].response_to_query[SC * c] == 0) {
          for (size_t i = 0; i < SC; i++) {
            hypothesis[h].response_to_query[SC * c + i] = 1;
          }
          break;
        }
      }
    }
  }

  vector<int> TI(Q, 0); // target interest
  for (size_t i = 0; i < T; i++) { // number of clusters to be interested in
    while (true) { // pick random clusters until a new one is found
      int c = rand() % NC;
      if (TI[SC * c] == 0) {
        for (size_t j = 0; j < SC; j++) {
          TI[SC * c + j] = 1;
        }
        break;
      }
    }
  }

  const Dataset dataset(hypothesis);
  vector<long long> alpha_p;
  vector<long long> kappa_p;
  // Thresholds are (1, N), (2, N - 1), ..., (N, 1)
  for (size_t n = 0; n < N; n++) {
    alpha_p.push_back(N - n);
    kappa_p.push_back(n + 1);
  }
  alpha_p.insert(alpha_p.begin(), 0); // not used, just shifts to 1-indexing
  alpha_p.push_back(0); // alpha_{N+1} is sometimes used
  kappa_p.insert(kappa_p.begin(), 0); // kappa_0 is sometimes used


  // From Definition 4.7
  // D_G = 1
  D = Q * (Q + 1) / 2 + 2;
  for (size_t i = 1; i <= N; i++) {
    long long temp = 1;
    for (size_t j = i; j <= N; j++) {
      temp *= (kappa_p[j] - kappa_p[j - 1]);
    }
    D += (2 * N - 2 * i) * temp;
  }

  vector<InfInt> alpha;
  vector<InfInt> kappa;
  alpha.push_back(0);
  kappa.push_back(0);
  for (size_t n = 1; n <= N; n++) {
    // Convert thresholds based on Definition 4.7
    long double ap = alpha_p[n];
    for (size_t i = 1; i <= n; i++) {
      long double temp = 1;
      for (size_t j = i - 1; j <= n; j++) {
        temp *= (kappa_p[j] - kappa_p[j - 1]);
      }
      ap -= epsilon / D * (2 * N - 2 * i) * temp;
    }

    InfInt a = floor(ap, epsilon / D);
    alpha.push_back(a);

    // kappas are unmodified
    kappa.push_back(InfInt(kappa_p[n]));
  }
  alpha.push_back(0);


  const Hypothesis target(TI);
  const int R = 2; // number of responses (just "yes" or "no")

  // Run greedy algorithm
  GreedyResult result = greedy(dataset, alpha, kappa, target, Q, R);

  // Print cost
  cout << result.cost << "\n";

  // Print utility and distance of each hypothesis
  for (size_t h = 0; h < H; h++) {
    cout << F_prime(hypothesis[h].response_to_query, result.query, result.response) << "\t";
    cout << G_prime(hypothesis[h].response_to_query, result.query, result.response) << "\n";
  }
}

