#' Generate a virtual species distribution with a PCA of environmental variables
#' 
#' This functions generates a virtual species distribution by computing a
#' PCA among environmental variables, and simulating the response of the species
#' along the two first axes of the PCA. The response to axes of the PCA is 
#' determined with gaussian functions.
#' @param raster.stack a RasterStack object, in which each layer represent an environmental 
#' variable.
#' @param rescale \code{TRUE} of \code{FALSE}. Should the output suitability raster be
#' rescaled between 0 and 1?
#' @param niche.breadth \code{"any"}, \code{"narrow"} or \code{"wide"}. This parameter
#' defines how tolerant is the species regarding environmental conditions by adjusting
#' the standard deviations of the gaussian functions. See details.
#' @param means a vector containing two numeric values. Will be used to define
#' the means of the gaussian response functions to the axes of the PCA.
#' @param sds a vector containing two numeric values. Will be used to define
#' the standard deviations of the gaussian response functions to the axes of 
#' the PCA.
#' @param pca a \code{dudi.pca} object. You can provide a pca object that you 
#' computed yourself with \code{\link[ade4]{dudi.pca}}
#' @param sample.points \code{TRUE} of \code{FALSE}. If you have a large
#' raster file then use this parameter to sample a number of points equal to
#' \code{nb.points}.
#' @param nb.points a numeric value. Only useful if \code{sample.points = TRUE}.
#' The number of sampled points from the raster, to perform the PCA. A too small
#' value may not be representative of the environmental conditions in your raster.
#' @param plot \code{TRUE} or \code{FALSE}. If \code{TRUE}, the generated virtual species will be plotted.
#' @note
#' To perform the PCA, the function has to transform the raster into a matrix.
#' This may not be feasible if the raster is too large for the computer's memory.
#' In this case, you should perform the PCA on a sample of your raster with
#' set \code{sample.points = TRUE} and choose the number of points to sample with
#' \code{nb.points}. 
#' @details
#' This function proceeds in 3 steps:
#' \enumerate{
#' \item{A PCA of environmental conditions is generated}
#' \item{Gaussian responses to the first two axes are computed}
#' \item{These responses are multiplied to obtain the final environmental suitability}}
#' 
#' If \code{rescale = TRUE}, the final environmental suitability is rescaled between 0 and 1,
#' with the formula (val - min) / (max - min).
#' 
#' The shape of gaussian responses can be randomly generated by the function or defined manually by choosing
#' \code{means} and \code{sds}. The random generation is constrained
#' by the argument \code{niche.breadth}, which controls the range of possible 
#' standard deviation values. This range of values is based on
#' a fraction of the axis:
#' \itemize{
#' \item{\code{"any"}: the standard deviations can have values from 1\% to 50\% of axes' ranges. For example if the first axis of the PCA ranges from -5 to +5,
#' then sd values along this axe can range from 0.1 to 5.
#' }
#' \item{\code{"narrow"}: the standard deviations are limited between 1\% and 10\% of axes' ranges. For example if the first axis of the PCA ranges from -5 to +5,
#' then sd values along this axe can range from 0.1 to 1.
#' }
#' \item{\code{"wide"}: the standard deviations are limited between 10\% and 50\% of axes' ranges. For example if the first axis of the PCA ranges from -5 to +5,
#' then sd values along this axe can range from 1 to 5.
#' }
#' }
#' @import raster
#' @export
#' @author
#' Boris Leroy \email{leroy.boris@@gmail.com}
#' 
#' with help from C. N. Meynard, C. Bellard & F. Courchamp
#' @seealso \code{\link{generateSpFromFun}} to generate a virtual species with
#' the responses to each environmental variables.
#' #' @return a \code{list} with 3 elements:
#' \itemize{
#' \item{\code{approach}: the approach used to generate the species, \emph{i.e.}, \code{"pca"}}
#' \item{\code{details}: the details and parameters used to generate the species}
#' \item{\code{suitab.raster}: the virtual species distribution, as a Raster object containing the
#' environmental suitability}
#' }
#' The structure of the virtualspecies object can be seen using str()
#' @examples
#' # Create an example stack with four environmental variables
#' a <- matrix(rep(dnorm(1:100, 50, sd = 25)), 
#'             nrow = 100, ncol = 100, byrow = TRUE)
#' env <- stack(raster(a * dnorm(1:100, 50, sd = 25)),
#'              raster(a * 1:100),
#'              raster(a * logisticFun(1:100, alpha = 10, beta = 70)),
#'              raster(t(a)))
#' names(env) <- c("var1", "var2", "var3", "var4")
#' plot(env) # Illustration of the variables
#' 
#' 
#' 
#' 
#' 
#' # Generating a species with the PCA
#' 
#' generateSpFromPCA(raster.stack = env)
#' 
#' # The top part of the plot shows the PCA and the response functions along
#' # the two axes.
#' # The bottom part shows the probabilities of occurrence of the virtual
#' # species.
#' 
#' 
#' 
#' 
#' 
#' # Defining manually the response to axes
#' 
#' generateSpFromPCA(raster.stack = env,
#'            means = c(-2, 0),
#'            sds = c(0.6, 1.5))
#'            
#' # This species can be seen as occupying intermediate altitude ranges of a
#' # conic mountain.
#' 


generateSpFromPCA <- function(raster.stack, rescale = TRUE, niche.breadth = "any",
                              means = NULL, sds = NULL, pca = NULL,
                              sample.points = FALSE, nb.points = 10000,
                              plot = TRUE)
{
  if(!(is(raster.stack, "Raster")))
  {
    stop("raster.stack must be a raster stack object")
  }
  if(sample.points)
  {
    if(!is.numeric(nb.points))
    {stop("nb.points must be a numeric value corresponding to the number of pixels to sample from raster.stack")}
    env.df <- sampleRandom(raster.stack, size = nb.points, na.rm = TRUE)
  } else
  {
    if(canProcessInMemory(raster.stack, n = 2))
    {
      env.df <- getValues(raster.stack)
      if(any(is.na(env.df)))
      {
        env.df <- env.df[-unique(which(is.na(env.df), arr.ind = T)[, 1]), ] # Removing NAs 
      }
    } else
    {
      stop("Your computer does not have enough memory to extract all the values from raster.stack. 
           Use the argument sample.points = TRUE, and adjust the number of points to use with nb.points. More details in ?generateSpFromPCA")
    }
  }
  
  if(!is.null(pca))
  {
    if(!all(class(pca) %in% c("pca", "dudi"))) 
    {stop("Please provide an appropriate pca.object (output of dudi.pca()) to make the pca plot.")}
    if(any(!(names(pca$tab) %in% names(raster.stack))))
    {stop("The variables used to make the pca must be the same as variables names in raster.stack")}
        
    pca.object <- pca
    rm(pca)
    sel.vars <- names(raster.stack)
  } else
  {
    
    sel.vars <- names(raster.stack)
    
    raster.stack <- raster.stack[[sel.vars]]
    
    if(sample.points)
    {
      if(!is.numeric(nb.points))
      {stop("nb.points must be a numeric value corresponding to the number of pixels to sample from raster.stack")}
      env.df <- sampleRandom(raster.stack, size = nb.points, na.rm = TRUE)
    } else
    {
      env.df <- getValues(raster.stack)
      if(any(is.na(env.df)))
      {
        env.df <- env.df[-unique(which(is.na(env.df), arr.ind = T)[, 1]), ] # Removing NAs 
      }
    }
    
    
    message(" - Perfoming the pca\n")
    pca.object <- ade4::dudi.pca(env.df, scannf = F, nf = 2)
  }
  message(" - Defining the response of the species along PCA axes\n")
  
  if(!is.null(means))
  {
    if(!is.numeric(means))
    {stop("Please provide numeric means for the gaussian function to compute probabilities of presence")}
    if(!is.vector(means) | length(means) != 2)
    {stop("Please provide a vector with 2 means for the gaussian function (one for each of the two pca axes)")}
  } else
  {
    means <- pca.object$li[sample(1:nrow(pca.object$li), 1), ][1, ]
    means <- c(mean1 = means[1, 1],
               mean2 = means[1, 2])
  }
  
  
  if(!is.null(sds))
  {
    if(!is.numeric(sds))
    {stop("Please provide numeric standard deviations for the gaussian function to compute probabilities of presence")}
    if(!is.vector(sds) | length(sds) != 2)
    {stop("Please provide a vector with 2 standard deviations for the gaussian function (one for each of the two pca axes)")}
    if(any(sds < 0))
    {stop("The standard deviations must have a positive value!")}
    message("    - You have provided standard deviations, so argument niche.breadth will be ignored.\n")
  } else
  {
    # Defining a range of values to determine sds for the gaussian functions
    axis1 <- c(min = max(min(pca.object$li[, 1]),
                         quantile(pca.object$li[, 1], probs = .25) - 
                           5 * (quantile(pca.object$li[, 1], probs = .75) - 
                                  quantile(pca.object$li[, 1], probs = .25))),
               max = min(max(pca.object$li[, 1]),
                         quantile(pca.object$li[, 1], probs = .75) + 
                           5 * (quantile(pca.object$li[, 1], probs = .75) - 
                                  quantile(pca.object$li[, 1], probs = .25))))
    axis2 <- c(min = max(min(pca.object$li[, 2]),
                         quantile(pca.object$li[, 2], probs = .25) - 
                           5 * (quantile(pca.object$li[, 2], probs = .75) - 
                                  quantile(pca.object$li[, 2], probs = .25))),
               max = min(max(pca.object$li[, 2]),
                         quantile(pca.object$li[, 2], probs = .75) + 
                           5 * (quantile(pca.object$li[, 2], probs = .75) - 
                                  quantile(pca.object$li[, 2], probs = .25))))
    
    # Random sampling of parameters
    if(niche.breadth == "any")
    {
      sds <- c(sd1 = sample(seq((axis1[2] - axis1[1])/100, (axis1[2] - axis1[1])/2, length = 1000), 1),
               sd2 = sample(seq((axis2[2] - axis2[1])/100, (axis2[2] - axis2[1])/2, length = 1000), 1))
    } else if (niche.breadth == "narrow")
    {
      sds <- c(sd1 = sample(seq((axis1[2] - axis1[1])/100, (axis1[2] - axis1[1])/10, length = 1000), 1),
               sd2 = sample(seq((axis2[2] - axis2[1])/100, (axis2[2] - axis2[1])/10, length = 1000), 1))
    } else if (niche.breadth == "wide")
    {
      sds <- c(sd1 = sample(seq((axis1[2] - axis1[1])/10, (axis1[2] - axis1[1])/2, length = 1000), 1),
               sd2 = sample(seq((axis2[2] - axis2[1])/10, (axis2[2] - axis2[1])/2, length = 1000), 1))
    } else
    {
      stop("niche.breadth must be one of these: 'any', 'narrow', 'wide")
    }
  }
  
 
  message(" - Calculating suitability values\n")
  pca.env <- calc(raster.stack[[sel.vars]], fun = function(x, ...)
    {.pca.coordinates(x, pca = pca.object, na.rm = TRUE)})
  suitab.raster <- calc(pca.env, fun = function(x, ...){.prob.gaussian(x, means = means, sds = sds)})
  if(rescale)
  {
    suitab.raster <- (suitab.raster - suitab.raster@data@min) / (suitab.raster@data@max - suitab.raster@data@min)
  }

  
  if(plot)
  {
    if(!("null device" %in% names(dev.cur()))) dev.off()
    par(mar = c(5.1, 4.1, 4.1, 2.1))
    layout(matrix(nrow = 2, ncol = 1, c(1, 2)))

    plotResponse(x = raster.stack, approach = "pca",
                 parameters = list(pca = pca.object,
                                   means = means,
                                   sds = sds))
    
    image(suitab.raster, axes = T, ann = F, asp = 1, 
          main = "Environmental suitability of the virtual species",
          las = 1, col = rev(terrain.colors(12)))
    
    
    legend(title = "Pixel\nsuitability", "right", inset = c(-0.1, 0),
           legend = c(1, 0.8, 0.6, 0.4, 0.2, 0),
           fill = terrain.colors(6), bty = "n")
    title("Environmental suitability of the virtual species")
  }
  
  results <- list(approach = "pca",
                  details = list(variables = sel.vars,
                                 pca = pca.object,
                                 rescale = rescale,
                                 axes = c(1,2), # Will be changed later if the choice of axes is implemented
                                 means = means,
                                 sds = sds),
                  suitab.raster = suitab.raster)
  class(results) <- append(class(results), "virtualspecies")
  return(results)
}

