rm(list = ls())
library(parallel)
library(purrr)
library(tidyr)
library(dplyr)
library(igraph) 
library(rosqp)
library(Matrix)
library(data.table)
library(ggplot2)
library(gridExtra)
library(rstudioapi)

# the following line is for getting the path of your current open file
current_path <- getActiveDocumentContext()$path 
# The next line set the working directory to the relevant one:
setwd(dirname(current_path))
# you can make sure you are in the right directory
print(getwd())

# load all functions
allFunctions <- sapply(list.files(pattern = "[.]R$", path = "functions", full.names = TRUE), source)

################################################################################################################################
####################################################### Simulation Parameters #######################################################
################################################################################################################################
nCores <- detectCores() - 1 # the number of cores for parallel computations
# model parameters
seedForNetwork <- 1234 # seed for generating network
nClusters <- 10 # number of cluster in the network
clusterSize  <- 5000 # number of nodes
avgDegree <- 21 # average degree in the network, use 21, 55, or 120
avgDegreeRatio <- 20 # the ratio of the within cluster average degree to the between cluster average degree, use 20, 10, or 2
power <- 1/4
seedForModelParameters <- 456 # seed for generating strength and base level exposure
strengthLim <- c(10, 100) # unnormalized strength will be generated from a U(strengthLim[1], strengthLim[2]) distribution
exposureLim <- c(1, 2) # unnormalized base level exposure will be generated from a U(exposureLim1], exposureLim[2]) distribution
betaSeq <- c(1)
deltaSeq <- c(0.25, 0.5, 1)
feedbackNoise <- function(producer, consumer){
  0
}
responseNoise <- function(producer){
  rnorm(length(producer), sd = 1)
}
# treatment definition
gammaSeq <- c(0, 0.5)
treatmentLevels <- sapply(gammaSeq, function(gamma) paste0("T(", gamma, ")"))
Treatment <- function(producer, consumer, edgeStrength, gamma = 0){
  producerDegree <- degree(network, producer)
  consumerDegree <- degree(network, consumer)
  return((edgeStrength / log(1 + producerDegree * consumerDegree))^gamma)
}
# design of experiment
nExperiments <- 500 # number of experiments
seedForDesign <- 999
seedForExperimentData <- 111
# booststrap variance estimation
nBootIter <- 1000 # the number of bootstrap samples

# paths for storing data and plots.
dataDir <- paste0("data/networkSize=",clusterSize * nClusters, "/avgDegree=",avgDegree, "/")
modelPath <- paste0(dataDir, "populationData/")
designPath <- paste0(dataDir, "experimentSetup/")
experimentDataPath <- paste0(dataDir, "experimentData/")
estimatePath <- paste0(dataDir, "estimates/")
clusterBasedDesignPath <- paste0(dataDir, "clusterExperimentSetup/")
clusterBasedExperimentDataPath <- paste0(dataDir, "clusterExperimentData/")
clusterBasedEstimatePath <- paste0(dataDir, "clusterBasedEstimates/")
plotsPath <- paste0("plots/networkSize=",clusterSize * nClusters, "/avgDegree=",avgDegree, "/")

allPaths <-
  c(
    modelPath,
    designPath,
    experimentDataPath,
    estimatePath,
    clusterBasedDesignPath,
    clusterBasedExperimentDataPath,
    clusterBasedEstimatePath,
    plotsPath
  )
newPathsCreated <-
  lapply(allPaths[!dir.exists(allPaths)], function(path)
    dir.create(paste0(getwd(), "/", path), recursive = TRUE))
cat(length(newPathsCreated), "new path(s) created \n")



################################################################################################################################
####################################################### Model Generation #######################################################
################################################################################################################################
# generate network wity edge strength and base level exposure
network <- GenerateNetwork(clusterSize, nClusters, avgDegree, power, avgDegreeRatio, seedForNetwork)
modelParameters <- GenerateStrengthAndExposure(network, strengthLim, exposureLim, nCores, seedForModelParameters)
modelParameters <- as_tibble(do.call("rbind", modelParameters))
# population Level Data Generation
populationDataPath <- GeneratePopulation(network, modelParameters, gammaSeq, deltaSeq, betaSeq, outputPath = modelPath)

################################################################################################################################
##################################################### Design of Experiment #####################################################
################################################################################################################################
# the design of experiment does not depend on delta or beta
# loading propolation data to get modelParametersWithTreatment
experimentParameters <- list(producerSampleSize = 0.2 * length(V(network)),
                             consumerSampleSize = 0.2 * length(V(network)),
                             consumerForExposureMatchingProb = 0.5,
                             RLim = c(0, 10),
                             SLim = c(0.2, 5),
                             maxIterQP = 10,
                             groupSizeQP = 1000)
load(populationDataPath[[1]])
networkWithTreatments <- modelParametersWithTreatment %>% 
  select(consumer, producer, edgeStrength, baseLevelExposure, treatmentLevelExposure, treatment)
treatmentSet <- as.vector(unique(modelParametersWithTreatment$treatment))
networkWithTreatments <- networkWithTreatments %>% spread(key = treatment, value = treatmentLevelExposure)
system.time(designPath <- mclapply(1:nExperiments, function(i) 
  GetExperiment(networkWithTreatments, treatmentSet, experimentParameters, experimentId = i, 
                outputPath = designPath, seedForDesign), 
  mc.cores = nCores))
designPath = lapply(1:nExperiments, 
                    function(i) paste0(dataDir, "experimentSetup/design_ID=", i, ".Rdata"))
designPath <- unlist(designPath)
# generate experiment data
system.time(experimentData <- unlist(mclapply(1:nExperiments, 
                                                  function(i) GenerateExperimentData(i, designPath, deltaSeq, betaSeq, 
                                                                                     outputPath = experimentDataPath, seedForExperimentData),
                                                  mc.cores = nCores)))

################################################################################################################################
######################################### Estimation and Importance Sampling Adjustment ########################################
################################################################################################################################
GetPopulationPath <- function(delta, beta) 
  return(paste0(modelPath, "population_delta=", delta, "_beta=", beta, ".Rdata"))
GetExperimentDataPath <- function(experimentId, delta, beta)
  return(paste0(experimentDataPath, "experiment=", experimentId, "_delta=", delta, "_beta=", beta, ".Rdata"))

allEstimates <- NULL
for (delta in deltaSeq){
  for (beta in betaSeq){
    load(GetPopulationPath(delta, beta))
    producerDegree <- feedback %>% group_by(producer) %>% 
      summarise(degree = n_distinct(consumer)) %>% ungroup()
    trueVal <- response %>% group_by(treatment) %>% summarise(trueAvgResponse = mean(noiseFree)) %>% 
      ungroup()
    treatmentLevels <- unique(trueVal$treatment)
    rm(list = c("feedback", "response"))
    estimate <- do.call("rbind", lapply(1:nExperiments, 
                                        function(i) GetEstimate(experimentId = i, GetExperimentDataPath, 
                                                                producerDegree, beta, delta, 
                                                                nBootIter, nCores)))
    estimate <- estimate %>% rename(treatment = producerInTreatment) %>% mutate(treatment = treatmentLevels[treatment])
    estimate <- estimate %>% left_join(trueVal, by = "treatment")
    filename <- paste0(estimatePath, "allEstimates_delta=", delta, "_beta=", beta, ".Rdata")
    save(estimate, file = filename)
    allEstimates <- rbind(allEstimates, estimate)
  }
}

save(allEstimates, file = paste0(estimatePath, "allEstimates.Rdata"))

################################################################################################################################
################################################### Cluster Based Estimation ###################################################
################################################################################################################################
# the design of experiment does not depend on delta or gamma
# loading propolation data to get modelParametersWithTreatment
load(populationDataPath[[1]])
networkWithTreatments <- modelParametersWithTreatment %>% 
  select(consumer, producer, edgeStrength, baseLevelExposure, treatmentLevelExposure, treatment)
networkWithTreatments <- networkWithTreatments %>% spread(key = treatment, value = treatmentLevelExposure)
treatmentSet <- as.vector(unique(modelParametersWithTreatment$treatment))
system.time(clusterDesignPath <- mclapply(1:nExperiments, function(i) 
  GetClusterExperiment(networkWithTreatments, treatmentSet, experimentId = i, 
                outputPath = clusterBasedDesignPath, seedForDesign), 
  mc.cores = nCores))
clusterDesignPath = lapply(1:nExperiments, 
                    function(i) paste0(dataDir, "clusterExperimentSetup/clusterDesign_ID=", i, ".Rdata"))
clusterDesignPath <- unlist(clusterDesignPath)
system.time(experimentData <- unlist(mclapply(1:nExperiments, 
                                                  function(i) GenerateExperimentData(i, clusterDesignPath, deltaSeq, betaSeq, 
                                                                                     outputPath = clusterBasedExperimentDataPath, seedForExperimentData),
                                                  mc.cores = nCores)))

GetClusterExperimentDataPath <- function(experimentId, delta, beta)
  return(paste0(clusterBasedExperimentDataPath, "experiment=", experimentId, "_delta=", delta, "_beta=", beta, ".Rdata"))

allClusterBasedEstimates <- NULL
for (delta in deltaSeq){
  for (beta in betaSeq){
    clusterBasedEstimate <- do.call("rbind", lapply(1:nExperiments, 
                                        function(i) GetClusterBasedEstimate(experimentId = i, GetClusterExperimentDataPath, 
                                                                beta, delta, 
                                                                nBootIter, nCores)))
    clusterBasedEstimate <- clusterBasedEstimate %>% rename(treatment = producerInTreatment)
    filename <- paste0(clusterBasedEstimatePath, "allClusterBasedEstimates_delta=", delta, "_beta=", beta, ".Rdata")
    save(clusterBasedEstimate, file = filename)
    allClusterBasedEstimates <- rbind(allClusterBasedEstimates, clusterBasedEstimate)
  }
}

save(allClusterBasedEstimates, file = paste0(clusterBasedEstimatePath, "allClusterBasedEstimates.Rdata"))

################################################################################################################################
####################################################### Plots ######################################################
################################################################################################################################
# population plots
pdf(file = paste0(plotsPath, "network.pdf"), width = 12, height = 12)
par(mfrow = c(2, 2))
hist(degree(network, V(network)), 100, xlab = "degree", ylab = "count", main = "")
plot(V(network), degree(network, V(network)), xlab = "memberId", ylab = "degree")
hist(modelParameters$edgeStrength, 100, xlab = "edge strength", ylab = "count", main = "")
hist(modelParameters$baseLevelExposure, 100, xlab = "base level exposure", ylab = "count", main = "")
dev.off()
mclapply(populationDataPath, function(dataPath) MakePopulationLevelPlots(dataPath, plotsPath), mc.cores = nCores)
