# Author:    Lerato E. Magosi
# R version: 3.1.0 (2014-04-10)
# Platform:  x86_64-apple-darwin10.8.0 (64-bit)
# Date:      18Jul2018


# Goal: Compute SPRE (Standardized Predicted Random Effects) statistics to identify overly
#       influential outlier studies in genetic association meta-analyses.


# Required libraries ---------------------------
# metafor            # for performing fixed and random-effects meta-analysis and estimating SPREs
# utils              # needed for the following functions: str, head, tail


# Calling globalVariables on the following variables to address 
# the note: "no visible binding for global variable" generated by "R CMD check"
utils::globalVariables(c("xb", "tau2", "xbse", "rawresid", "uncondse", "str"))


# Function: compute_spre_statistics 
#
#
# parameters: 
#
#
#   beta_in                   (numeric)   vector of effect-sizes, 
#   se_in                     (numeric)   vector of standard errors genomically corrected at study-level, 
#   study_names_in            (character) vector of study names, 
#   variant_names_in          (character) vector of variant names,
#   tau2_method               (character) method to estimate heterogeneity: either "DL" or "REML",
#
#
# returns: a list containing:
#
#
#   number_variants            (numeric),
#   number_studies             (numeric),           
# 	spre_dataset               (dataframe),
#
# ------------------------------------------------------------------------------------




compute_spre_statistics <- function(beta_in, se_in, study_names_in, variant_names_in, 
						            tau2_method = "DL", verbose_output = FALSE) {




	# Assemble dataset

	beta <- base::as.numeric(beta_in)
	se <- base::as.numeric(se_in)
	study_names <- base::factor(study_names_in)
	variant_names <- base::factor(variant_names_in)

	m <- base::data.frame(beta, se, variant_names, study_names)

    # View dataset structure
    if (verbose_output) utils::str(m)
    
    # View first six lines of dataset
    if (verbose_output) utils::head(m)

    # ---------------------------
    
    
    # Assign study numbers
	m$study <- m$study_names
	base::levels(m$study) <- base::seq_along(base::levels(m$study))

    # Calculate no. of studies
    nstudies <- base::nlevels(m$study_names)
    
    # Assign snp/variant numbers
    m$snp <- m$variant_names
    base::levels(m$snp) <- base::seq_along(base::levels(m$snp))

    # Calculate no. of variants
    nsnps <- base::nlevels(m$variant_names)

    
    # Print numbers of studies and snps
	if (verbose_output) {

		base::writeLines("\n")
		base::print(base::paste("Summary: This heterogeneity analysis is based on ", nsnps, " SNP(s) and ", nstudies, " studies"))
		base::print("***************** end of part 1: assign snp and study numbers	 ****************")
		base::writeLines("")
		
		}


	# -----------------------------------



    # Define function to align study effects i.e. betas
    align_betas <- function(dframe_current_snp) {
    
        if (tau2_method == "DL") {

			metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = T, knha = T, method = tau2_method)

			if (metafor_res$b[1] < 0) {
		
				if (verbose_output) { base::print(base::paste0("Aligning study effects in variant: ", base::unique(dframe_current_snp[, "variant_names"]))) }
		
				dframe_current_snp$beta <- -dframe_current_snp$beta
				
			}                        
        }
        
        else {

			metafor_res <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = T, knha = T, method = tau2_method, control=list(stepadj=0.5, maxiter=10000))

			if (metafor_res$b[1] < 0) {
		
				if (verbose_output) { base::print(base::paste0("Aligning study effects in variant: ", base::unique(dframe_current_snp[, "variant_names"]))) }
		
				dframe_current_snp$beta <- -dframe_current_snp$beta
				
			}        
        }
    

        dframe_current_snp
        
    }
 
 
    # Split dataset by snp to obtain mini-datasets for each snp, then align study effects i.e. betas 
    list_snp_minidatasets <- base::split(m, m$variant_names)
    align_betas_results <- base::lapply(list_snp_minidatasets, align_betas)

	if (verbose_output) base::print("***************** end of part 2: align study effects	 ****************")

	# -----------------------------------

    
    
    
    # Define function to extract standardized shrunken residuals i.e. usta aka SPREs
    
    
    metafor_weighted_least_squares_rand_effects_regression <- function(dframe_current_snp) {
    
        # Run random effects meta-analysis (inverse variance weighted least sq regression) on current snp
        
        if (tau2_method == "DL") {

			metafor_results <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = T, knha = T, method = tau2_method)        
        } 
        else {
        
			metafor_results <- metafor::rma.uni(yi = dframe_current_snp[, "beta"], sei = dframe_current_snp[, "se"], weighted = T, knha = T, method = tau2_method, control=list(stepadj=0.5, maxiter=10000))        
        }
        
        
        # Compute predicted values for the metafor_results
        metafor_results_predict <- metafor::predict.rma(metafor_results, digits = 8)

		# Warning note: Although the prediction's including random effects were similar i.e. 
		# (Empirical Bayes estimates in metafor and xbu in Stata's metareg) the values of 
		# their corresponding standard errors were quite different.

        
        # Compute Best Linear Unbiased Predictions i.e. Blups for the metafor_results
        metafor_results_blup <- metafor::blup.rma.uni(metafor_results, digits = 8)
        
        # Compute outlier diagnostics
        metafor_results_influence <- metafor::influence.rma.uni(metafor_results)
        
        # Extract hat values i.e. leverage a.k.a diagonals of hat matrix
        metafor_results_influence_hat <- (metafor_results_influence$inf)$hat
 
        # Print metafor results
 		if (verbose_output) {
		
		    base::writeLines("")
            base::print(paste("Random-effects meta-analysis for variant: ", base::unique(dframe_current_snp$variant_names)))
            base::writeLines("")
            base::print(paste("snp_no: ", base::unique(dframe_current_snp$snp)))
            base::writeLines("")

            base::print("metafor_results: ")
		    base::print(metafor_results)
		    base::writeLines("")
		    
		    }
       
        
        # Create a data.frame comprising results to return 
        output <- base::data.frame(
            dframe_current_snp,
            tau2 = metafor_results$tau2,           # estimate of between-study variance
            I2 = metafor_results$I2,               # Higgins inconsistency metric
            Q = metafor_results$QE,                # Q-statistic 
            xb = metafor_results_predict$pred,     # predicition excluding random effects
            xbse = metafor_results_predict$se,     # standard error of prediction excluding random effects
            xbu = metafor_results_blup$pred,       # predictions including random effects
            stdxbu = metafor_results_blup$se,      # corresponding std. error of Empirical Bayes estimates in metafor
            hat = metafor_results_influence_hat,   # leverage a.k.a diagonals of hat matrix
            row.names = base::with(dframe_current_snp, base::interaction(variant_names, study_names)))

        output
            
    }
    

 
    metafor_weighted_least_squares_rand_effects_regression_results <- base::lapply(align_betas_results, metafor_weighted_least_squares_rand_effects_regression)

    # View structure 
	if (verbose_output) {

		base::writeLines("\n")
		base::writeLines("Dataset structure for individual variants: ")
		base::writeLines("")
		utils::str(metafor_weighted_least_squares_rand_effects_regression_results)
		base::writeLines("")
		
		}
    
    # Append snp mini-datasets into a single file
    meta_analysis_results <- base::data.frame(base::do.call(rbind, metafor_weighted_least_squares_rand_effects_regression_results))


    # Compute raw residuals, unconditional standard error and ustas (a.k.a spres: standardized predicted random effects)
    # notes: R. M. Harbord and J. P. T. Higgins, pg. 517 sbe23, Stata journal
	# Reference: Meta-regression in Stata, The Stata Journal (2008) 8, Number 4, pp. 493 to 519

    meta_analysis_results_incl_spres <- meta_analysis_results
    
    meta_analysis_results_incl_spres$rawresid <- base::with(meta_analysis_results_incl_spres, beta - xb)
    meta_analysis_results_incl_spres$uncondse <- base::with(meta_analysis_results_incl_spres, base::sqrt(se**2 + tau2 - xbse**2))
    meta_analysis_results_incl_spres$spre <- base::with(meta_analysis_results_incl_spres, rawresid / uncondse)

		
    # View structure 
	if (verbose_output) {

		base::writeLines("\n")
		base::writeLines("Structure for dataset including SPRE statistics: ")
		base::writeLines("")
	
		utils::str(meta_analysis_results_incl_spres)
		base::writeLines("")

		}

	if (verbose_output) base::print("***************** end of part 3: compute SPRE statistics	 ****************")

	# -----------------------------------
	
	
	
	# List of items to return
	base::list(number_variants = nsnps,
	           number_studies = nstudies, 
	           spre_dataset = meta_analysis_results_incl_spres)
	    

}

