#' @title Robust testing in GLMs, by sign-flipping score contributions
#'
#' @description Provides robust tests for testing in GLMs, by sign-flipping score contributions. The tests are often robust against overdispersion, heteroscedasticity and, in some cases, ignored nuisance variables.
#' @param formula see \code{glm} function. It can also be a model (usually generated by a call to \code{glm}); in this case, any other glm-related parameter (e.g. \code{family, data, etc.}) are discarded, the function will make use of the ones used to generate the model. 
#' (i.e. \code{formula}, \code{family}, \code{data}, etc) are not considered.  It is \code{NULL} by default (i.e. not used). 
#' @param family see \code{glm} function.
#' @param data see \code{glm} function.
#' @param score_type The type of score that is computed. It can be "standardized" "orthogonalized", "effective" or "basic". 
#' Both "orthogonalized" and "effective" take into account the nuisance estimation and they provide the same
#' test statistic. In case of small samples "effective score" might have a slight anti-conservative behaviour. 
#' "standardized effective score" gives a solution for this issue.
#' "orthogonalized" has a similar intent, note however that in case of a big model matrix, it may be slow.
#' @param n_flips The number of random flips of the score contributions. Overwritten with the \code{nrow(flips)} when \code{flips} is not \code{NULL} (see parameter \code{flips} for more details).
#' When \code{n_flips} is equal or larger than the maximum number of possible flips (i.e. n^2), all possible flips are performed.
#' @param alternative It can be "greater", "less" or "two.sided" (default)
#' @param id a \code{vector} identifying the clustered observations. If \code{NULL} (default) observations are assumed to be independent. If \code{id} is not \code{NULL}, only \code{score_type=="effective"} is allowed, yet.
#' @param seed \code{NULL} by default. 
#' @param to_be_tested vector of indices or names of coefficients of the glm model to be tested (it is faster than computing every scores and p-values of course).
#' @param flips matrix fo +1 or -1, the matrix has \code{n_flips} rows and n (number of observations) columns
#' @param precompute_flips \code{TRUE} by default. Overwritten if \code{flips} is not \code{NULL}. If \code{FALSE} the matrix of flips is not computed and the flips are made 'on-the-fly' before computing the test statistics; it may be usefull when \code{flips} is very large (see parameter \code{flips} for more details).
#' @param output_flips \code{FALSE} by default. If \code{TRUE} the \code{flips} matrix is returned. Useful when the same flips are needed for more glms, for example in the case of multivariate glms where the joint distribution of test statistis if used for multivariate inference. 
#' @param ... see \code{glm} function.
#' 
#'
#' @usage flipscores(formula, family, data, score_type = "standardized",
#' n_flips = 5000, alternative = "two.sided", id = NULL,
#' seed = NULL, to_be_tested = NULL, flips = NULL,
#' precompute_flips = TRUE, output_flips = FALSE, ...)
#'
#' @return an object of class \code{flipscores}.
#' See also its methods (\code{summary.flipscores}, \code{anova.flipscores}, \code{print.flipscores}). 
#'
#' @details \code{flipscores} borrows the same parameters from function \code{glm} (and \code{glm.nb}). See these helps for more details about parameters such as \code{formula},
#' \code{data}, \code{family}. Note: in order to use Negative Binomial family, \code{family} reference must have quotes (i.e. \code{family="negbinom"}). 
#'  Furthermore, \code{flipscores} object contains two extra elements: \code{scores}  -- i.e. a matrix of n score contributions, one column for each tested coefficient -- and \code{Tspace} -- i.e. a matrix of size \code{n_flips} times \code{ncol(scores)}. The fist row of  \code{Tspace} contains column-wise the test statistics generated by randomly flipping the score contributions, each column refers to the same column of \code{scores}, the vector of observed test statistics (i.e. no flips) is in the first row of \code{Tspace}.  
#'
#' @author Livio Finos, Riccardo De Santis, Jesse Hemerik and Jelle Goeman
#'
#' @seealso \code{\link{anova.flipscores}}, \code{\link{summary.flipscores}}, \code{\link[flip]{flip}}
#'
#' @name flipscores
#' 
#' @references "Robust testing in generalized linear models by sign-flipping score contributions" by J.Hemerik, J.Goeman and L.Finos.
#' 
#' @examples
#' set.seed(1)
#' dt=data.frame(X=rnorm(20),
#'    Z=factor(rep(LETTERS[1:3],length.out=20)))
#' dt$Y=rpois(n=20,lambda=exp(dt$Z=="C"))
#' mod=flipscores(Y~Z+X,data=dt,family="poisson",n_flips=1000)
#' summary(mod)
#' 
#' # Equivalent to:
#' model=glm(Y~Z+X,data=dt,family="poisson")
#' mod2=flipscores(model)
#' summary(mod2)
#' 
#' @export



flipscores<-function(formula, family, data,
                     score_type = "standardized",
                     n_flips=5000, 
                     alternative ="two.sided", 
                     id = NULL,
                     seed=NULL,
                     to_be_tested=NULL,
                     flips=NULL,
                     precompute_flips=TRUE,
                     output_flips=FALSE,
                     ...){
  # if(FALSE) flip() #just a trick to avoid warnings in package building
  # temp=is(formula) #just a trick to avoid warnings in package building
  # catturo la call,
  fs_call <- mf <- match.call()
  
  score_type=match.arg(score_type,c("orthogonalized","standardized","effective","basic","my_lab"))
  if(missing(score_type))
    stop("test type is not specified or recognized")
  
  
  # individuo i parametri specifici di flip score
  m <- match(c("score_type","n_flips","alternative","id","output_flips","seed","flips","precompute_flips"), names(mf), 0L)
  m <- m[m>0]
  flip_param_call= mf[c(1L,m)]
  
  # rinomino la funzione da chiamare:
  flip_param_call[[1L]]=.flip_test
  
  
  flip_param_call$id=eval(flip_param_call$id, parent.frame())
  flip_param_call$alternative=eval(flip_param_call$alternative, parent.frame())
  flip_param_call$flips <- eval(flip_param_call$flips, parent.frame())
  if(!is.null(flip_param_call$flips)) 
    flip_param_call$n_flips=nrow(flip_param_call$flips) else{
      flip_param_call$n_flips <- eval(flip_param_call$n_flips, parent.frame())
      if(is.null(flip_param_call$n_flips)) flip_param_call$n_flips=5000
    }
  
  if(is.null(flip_param_call$precompute_flips)) flip_param_call$precompute_flips=TRUE  
  flip_param_call$family <- eval(flip_param_call$family, parent.frame()) 
  flip_param_call$score_type <- eval(flip_param_call$score_type, parent.frame()) 
  if(is.null(flip_param_call$score_type)) flip_param_call$score_type = "standardized"
  flip_param_call$seed <- eval(flip_param_call$seed, parent.frame())
  flip_param_call$nobservations = eval(mf$nobservations, parent.frame())
  mf$nobservations=NULL
  
  m2 <- match(c("to_be_tested"), names(mf), 0L)
  if(m2==0)
    to_be_tested=NULL
  else{
    m <- c(m,m2)
    to_be_tested=mf[[m2]]
  }
  
  #####check id not null only with effective score:
  if(!is.null(flip_param_call$id)&&(score_type=="orthogonalized")){
    print(warning("WARNING: Use of id is not possible with score_type=='orthogonalized', yet. 
 Nothing done."))
    return(NULL)
  }
  
  # mi tengo solo quelli buoni per glm
  if(length(m)>0) mf <- mf[-m] 
  mf$offset = eval(mf$offset,parent.frame()) # most of the time it is not a model
  
  model = eval(mf$formula,parent.frame()) # most of the time it is not a model
  if("formula"%in%is(model)){ # usual input
    # if("model"%in%names(mf)){
    #   #compute H1 model
    #   model <- eval(mf$model, parent.frame())
    #   if(is.null(model[["x"]])){
    #     param_x_ORIGINAL=FALSE
    #     model=update(model,x=TRUE)
    #     } else param_x_ORIGINAL=TRUE
    #   
    # } else { #fit the glm or negbinom model
    #set the model to fit
    if(!is.null(mf$family)&&(mf$family=="negbinom")){
      mf[[1L]]=quote(glm.nb)
      mf$family=NULL
    } else{
      mf[[1L]]=quote(glm)
    }
    
    #compute H1 model
    param_x_ORIGINAL=mf$x
    mf$x=TRUE
    model <- eval(mf, parent.frame())
  } else { # input is a model
    param_x_ORIGINAL <- TRUE
    model <- update(model,x=TRUE)
  }
  
  if(is.null(model$y)) model$y=model$model[,1]
  
  #compute H0s models
  to_be_tested=eval(to_be_tested, parent.frame())
  if(is.null(to_be_tested))
    to_be_tested=colnames(model[["x"]]) else 
      {
        if(is.numeric(to_be_tested))
          to_be_tested = colnames(model[["x"]])[to_be_tested]

      to_be_tested=eval(to_be_tested,parent.frame())
      }
  
  if(!is.null(flip_param_call$flips)){
    flip_param_call$precompute_flips=FALSE
    flip_param_call$n_flips=nrow(eval(flip_param_call$flips,parent.frame()))
  } else if(flip_param_call$precompute_flips){
      set.seed(seed)
      flip_param_call$flips=.make_flips(max(nrow(model$model),flip_param_call$nobservations),
                                        flip_param_call$n_flips,flip_param_call$id)
    }  
  

#  if(is.null(flip_param_call$seed)) flip_param_call$seed=Sys.time() #eval(.Random.seed[1], envir=.GlobalEnv)
  results=lapply(to_be_tested,socket_compute_scores_and_flip,
                 model,
                 flip_param_call=flip_param_call#score_type=score_type,
                 # id=eval(flip_param_call$id, parent.frame()),
                 # alternative=flip_param_call$alternative,
                 # n_flips=flip_param_call$n_flips,
                 # seed=flip_param_call$seed
  )
  model$scores=data.frame(lapply(results,function(x)x[[1]]$scores))
  nrm=sapply(results,function(x)attributes(x[[1]]$scores)$scale_objects$nrm)
  std_dev=sapply(results,function(x)attributes(x[[1]]$scores)$sd)
  model$Tspace=data.frame(lapply(results,function(x)x[[1]]$Tspace)) 
  model$p.values=sapply(results,function(x)x[[1]]$p.values)
  if(!is.null(flip_param_call$output_flips)&&flip_param_call$output_flips)
    model$flips=flip_param_call$flips
  flip_param_call$flips=NULL
  attr(model$scores,"nrm")=nrm
  attr(model$scores,"sd")=std_dev
  attr(model$scores,"resid_std")=data.frame(lapply(results,function(x)attr(x[[1]]$scores,"resid_std"))) 
  names(attributes(model$scores)$resid_std) <-names(nrm) <- names(std_dev) <- names(model$scores)<- names(model$Tspace) <- names(model$p.values) <-to_be_tested
  
  
  ### output
  model$call=fs_call
  model$flip_param_call=flip_param_call
  # model$id=flip_param_call$id
  model$score_type=score_type
  #model$n_flips=n_flips
  
  
  if(is.null(param_x_ORIGINAL)||(!param_x_ORIGINAL)) model$x=NULL
  class(model) <- c("flipscores", class(model))
  return(model)
}
