#' Read file in report format
#' 
#' This function reads the content of a reporting file (a file in the model
#' intercomparison file format *.mif) into a list of MAgPIE objects or a single
#' MAgPIE object.
#'
#' 
#' @param file file name the object should be read from.
#' @param as.list if TRUE a list is returned (default), if FALSE it is tried to
#' merge all information in one MAgPIE object (still under development and
#' works currently only if the entries for the different models and scenarios
#' have exactly the same regions and years).
#' 
#' @details 
#' 
#' The \strong{Model Intercomparison File Format (MIF)} is the default file
#' format for data produced by Integrated Assessment Models. It is based on the
#' common format used for Model Intercomparison Projects such as EMF and SSP
#' with some slight changes/clarifications in its definition. For interactions
#' between models this format should be used. For everything else it is at least
#' recommended to use this format, too.
#'
#' Aim of this standardization is to achieve a more flexible and smooth
#' communication between models and to facilitate the creation of aggregated
#' outputs from integrated assessment scenario runs which then can easily be
#' uploaded to external databases such as the EMF or SSP database. By using this
#' standard most of the required decisions for a working input output interface
#' between models have already been specified which significantly reduces the
#' required work to get a new interaction running.
#'
#' \strong{Definition}
#' 
#' The format is characterized by the following features:
#'   
#' \itemize{
#'   \item The file ending is ".mif"
#'   \item The file is written in ASCII format
#'   \item Entries are separated with ";", every line ends with a ";"
#'   \item The file always contains a header
#'   \item The format of the header is: \code{Model;Scenario;Region;Variable;Unit;<ADDITIONAL_COLUMNS>;<YEARS>;}
#'}
#'  
#' The first 5 entries always have to exist, <ADDITIONAL_COLUMNS> is additional
#' information which can be added optionally (e.g. "Description") and <YEARS>
#' are the years for which data is delivered. <YEARS> are always written as 4
#' digit numbers. In the (very unlikely) case that a year before 1000 is used
#' the number has to start with a 0, e.g. 0950. <ADDITIONAL_COLUMNS> can be
#' anything, there are no further rules at the moment what it can contain.
#' However, there are strict rules for naming these columns. Allowed are single
#' names starting with a capital letter without special characters in it except
#' "_" which is allowed. Examples: "Description" allowed, "More Description" not
#' allowed, "More_Description" allowed, "123Description" not allowed,
#' "Description123" allowed. Scripts using this format must be able to ignore
#' additional columns. For years there are no specific limitations/requirements
#' which years should be reported. Scripts dealing with this data must be able
#' to work with different temporal resolutions. For variables basically
#' everything can be reported here. Missing values have to be marked with "N/A".
#' 
#' @author Jan Philipp Dietrich
#' @seealso \code{\link{write.report}}
#' @examples
#' 
#' \dontrun{
#'   read.report("report.csv")
#' }
#' 
#' @export read.report
#' @importFrom utils read.table
#' 
read.report <- function(file,as.list=TRUE) {
  
  .trim <- function(a) return(gsub("(^ +)|( +$)", "",as.character(a)))
  
  .return_magpie <- function(tmp,scenario,model) {
    
    #replace weird ° in tables 
    tmp$Unit      <- sub(pattern="\U{00B0}C",replacement = "K", x = tmp$Unit, useBytes = TRUE)
    regions <- unique(as.character(tmp$Region))
    names(regions) <- regions
    years <- sub("X","y",grep("^X[0-9]{4}$",dimnames(tmp)[[2]],value=TRUE))
    names <- unique(paste(tmp$Variable, "#SPLITHERE# (",tmp$Unit,")",sep =""))
    units <- sub("^.*#SPLITHERE# \\((.*)\\)$","\\1",names)
    names(names) <- sub("#SPLITHERE#", "", names)
    names <- sub("#SPLITHERE#","",names)
    #delete dots if they are aparrently not used as dimension separator
    ndots <- nchar(gsub("[^\\.]*","",names))
    if(any(ndots!=ndots[1])) names <- gsub("\\.","",names)
    #replace weird ° in tables after sub function evaluation 
    names        <- sub(pattern="\U{00B0}C",replacement = "K", x = names, useBytes = TRUE)
    names(names) <- sub(pattern="\U{00B0}C",replacement = "K", x = names(names), useBytes = TRUE)
    mag <- new.magpie(sub("ZZZZZZGLO","GLO",(sort(sub("GLO","ZZZZZZGLO",regions)))),years,names)
    yearelems <- grep("^X[0-9]{4}$",dimnames(tmp)[[2]])
    regions[order(sub("GLO","ZZZZZZGLO",regions))] <- dimnames(mag)[[1]]
    mag <- as.array(mag)
    coord <- cbind(regions[tmp$Region],rep(years,each=dim(tmp)[1]),names[paste(tmp$Variable, " (",tmp$Unit,")",sep ="")])
    if(dim(coord)[1]>length(mag)) {
      duplicates <- duplicated(as.data.table(coord))
      warning("Duplicate entries found for model \"",model,"\" and scenario \"",scenario,"\" and only the last entry found in the data will be used (duplicate entries: ",paste(apply(rbind(NULL,unique(coord[duplicates,c(1,3)])),1,paste,collapse="|"),collapse=", "),")!")    
    }

    mag[coord] <- suppressWarnings(as.numeric(as.vector(as.matrix(tmp[,yearelems]))))
    names(dimnames(mag)) <- c("region","year","variable")
    mag <- as.magpie(mag,spatial=1,temporal=2)
    if (withMetadata())  getMetadata(mag,"unit") <- install_magpie_units(units)
return(mag)  
  }

  .readmif <- function(file) {
    default_header <- c("Model","Scenario","Region","Variable","Unit","X2005",
                        "X2010","X2020","X2030","X2040","X2050","X2060","X2070",
                        "X2080","X2090","X2100")
    #determine seperator
    s <- read.table(file,sep=";",header=FALSE,nrows=1,stringsAsFactors=FALSE) 
    if (all(names(s) == "V1")) sep <- "," else sep <- ";"
    #recognize header
    s <- read.table(file,sep=sep,header=FALSE,nrows=1,stringsAsFactors=FALSE) 
    header <- (.trim(s[,1]) == "Model" | .trim(s[,1]) == "MODEL")
    #read in raw data
    raw <- read.table(file,sep=sep,header=header,stringsAsFactors=FALSE,na.strings = "N/A")#,fileEncoding = "UTF8")
    ugly_format <-  all(is.na(raw[,dim(raw)[2]]))
    if(ugly_format) raw <- raw[,-dim(raw)[2]]
    
    if("number of items read is not a multiple of the number of columns" %in% names(warnings())) {
      stop("Inconsistent input data! At least one line is incomplete!")
    }
    
    #rename from uppercase to lowercase
    if (header & .trim(s[,1]) == "MODEL") {
      names(raw)[1:5] <- default_header[1:5]
    }
    
    if(!header) {
      if(dim(raw)[2]==length(default_header)) dimnames(raw)[[2]] <- default_header
      else stop("Cannot read report. No header given and report has not the standard size!")   
    }
    
    output <- list()
    raw$Scenario <- .trim(raw$Scenario)
    raw$Model    <- .trim(raw$Model) 
    raw$Region   <- .trim(raw$Region)
    raw$Unit     <- .trim(raw$Unit)
    raw$Variable <- .trim(raw$Variable)
    
    raw$Model[is.na(raw$Model)] <- "NA"
    raw$Scenario[is.na(raw$Scenario)] <- "NA"
    
    raw$Region <- sub("R5\\.2","",raw$Region)
    raw$Region <- sub("World|glob","GLO",raw$Region)
    models <- unique(raw$Model)
    scenarios <- unique(raw$Scenario)
    for(scenario in scenarios) {
      output[[scenario]] <- list()
      for(model in models) {  
        if (nrow(raw[raw$Model==model & raw$Scenario==scenario,]) > 0) {
          output[[scenario]][[model]] <- .return_magpie(raw[raw$Model==model & raw$Scenario==scenario,],scenario,model)
          if(!as.list) getNames(output[[scenario]][[model]]) <- paste(scenario,model,getNames(output[[scenario]][[model]]),sep=".")          
        }
      }
    }
    return(output)
  }
  
  #expand wildcards
  file_name_unexpanded <- file  
  file <- Sys.glob(file)
  if(length(file)>1) {
    output <- NULL
    for(f in file) {
      output <- c(output,.readmif(f))
    }
  } else if(length(file)==0) {
    stop("File ",file_name_unexpanded," could not be found!")
  } else {
    output <- .readmif(file)
  }
    
  if(!as.list) {
    regions <- Reduce(union,lapply(unlist(output,recursive=FALSE),function(source){getRegions(source)})) # make sure that magpie objects to be merged share the same regions
    output <- mbind(lapply(unlist(output,recursive=FALSE),function(source){
        data <- new.magpie(regions,getYears(source),getNames(source),fill=NA)
        data[getRegions(source),getYears(source),getNames(source)] <- source[getRegions(source),getYears(source),getNames(source)]
        return(data)
      }))
	  names(dimnames(output))[3] <- "scenario.model.variable"
  }
  return(output)
}













