#' Identify markers of each cluster
#' @description This function first identify marker genes in each cluster
#' with Roc threshold > RocThr. Then, based on marker genes identified above,
#' this function calculates the difference and power of marker genes in each
#' cluster, and marker genes with Difference threshold > DiffThr will be retained.
#' Next, gene with the largest power in which cluster will be the marker
#' gene in this cluster. Eventually, make fisher test for power of each cluster,
#' cluster with p.value < 0.05 will be retained as the final cluster for marker gene
#'
#' @param Seurat_object Seurat object, should contain cluster information
#' @param PowerCutoff numeric, indicating the cutoff of gene power to refine marker genes
#' @param DifferenceCutoff numeric, indicating the cutoff of difference in marker genes between
#' clusters to refine marker genes
#' @param PvalueCutoff numeric, indicating the p.value cutoff of chi-square test to refine marker genes
#' @importFrom Seurat FindAllMarkers
#' @importFrom Seurat GetAssayData
#' @importFrom stats fisher.test
#' @importFrom stats chisq.test
#'
#' @return Data frame of conserved markers
#' @export
#'
#' @examples data("pbmc_small")
#' all.markers <- Identify_Markers(pbmc_small)
Identify_Markers<-function(Seurat_object, PowerCutoff=0.4,
                           DifferenceCutoff=0,PvalueCutoff=0.05 ){
  validInput(Seurat_object,'seurat_object','seuratobject')
  validInput(PowerCutoff,'PowerCutoff','numeric')
  validInput(DifferenceCutoff,'DifferenceCutoff','numeric')
  validInput(PvalueCutoff,'PvalueCutoff','numeric')

  MarkerRoc<-Identify_Markers1(Seurat_object,PowerCutoff,DifferenceCutoff)
  MarkerRoc<-as.data.frame(MarkerRoc)
  Marker<-Identify_Markers2(Seurat_object,MarkerRoc,PowerThr1=DifferenceCutoff)
  final_Markers<-Refine_Markers(Seurat_object,Marker,p.value = PvalueCutoff)
  final_Markers <- final_Markers[,c(1:4,ncol(final_Markers),5:(ncol(final_Markers)-1))]
  colnames(final_Markers)[2] <- 'Allpower'
  return(final_Markers)
}


#' Format marker genes for plotting
#' @description Order the gene expression in each cluster to make the heatmap
#' look better
#' @param Marker_genes data.frame, generated by \code{\link{Identify_Markers}}
#' @export
#' @return Markers corresponding to certain cluster
#'
#' @examples data("pbmc_small")
#' all.markers <- Identify_Markers(pbmc_small)
#' all.markers2 <- Format_Markers_Frac(all.markers)
Format_Markers_Frac<-function(Marker_genes){
  RNA1 <- Marker_genes
  Cluster1 <- t(apply(RNA1, 1, function(x1){
    x21 <- strsplit(x1[1],',')[[1]]; x22 <- strsplit(x1[2],',')[[1]]
    x31 <- paste(x21[1:(length(x21)-1)], collapse=',')
    x32 <- x22[1]
    return(c(x21[1], x31, x32))
  }))
  colnames(Cluster1) <- c('cluster','Allcluster','power')
  RNA2 <- cbind(Cluster1, RNA1[,5:ncol(RNA1)])
  RNA3 <- RNA2[order(RNA2[, 'power'], decreasing=T), ]
  RNA3 <- RNA3[order(RNA3[, 'Allcluster']), ]
  RNA3 <- RNA3[order(RNA3[, 'cluster']), ]
  RNA3$gene <- rownames(RNA3)
  RNA3 <- RNA3[,c(ncol(RNA3),(1:ncol(RNA3)-1))]
  return(RNA3)
}


Identify_Markers1<-function(Seurat_object, PowerThr1=1/3,diffthr1){
  Marker0 <- Seurat::FindAllMarkers(object = Seurat_object, test.use ='roc', return.thresh = PowerThr1)
  Marker1 <- as.data.frame(Marker0)
  Marker2 <- Marker1[Marker1[,'avg_diff'] > diffthr1 & Marker1[,'power']>PowerThr1,]
  uMarker2 <- unique(Marker2[,'gene'])
  MarkerRoc1 <- Cal_MarkersRoc(Seurat_object, uMarker2)
  MarkerRoc2 <- Select_MarkersPower2(MarkerRoc1)
  return(MarkerRoc2)
}




Select_MarkersPower <- function(MarkerRoc01){

  MarkerRoc02 <- t(Revise_MarkersPower(MarkerRoc01))
  colnames(MarkerRoc02) <- gsub('_power','',colnames(MarkerRoc02))
  Power02 <- c()
  for(i in 1:dim(MarkerRoc02)[1]){
    MarkerRoc03 <- t(as.matrix(MarkerRoc02[i,seq(3,dim(MarkerRoc02)[2],3)]))
    Power01 <- Sort_MarkersPower(MarkerRoc03,0)
    Power02 <- rbind(Power02,Power01)
  }
  rownames(Power02) <- rownames(MarkerRoc02)
  colnames(Power02) <- c('Cluster','Power','Diff')

  return(Power02)
}

Select_MarkersPower2 <- function(MarkerRoc01){

  MarkerRoc02 <- t(Revise_MarkersPower(MarkerRoc01))
  colnames(MarkerRoc02) <- gsub('_power','',colnames(MarkerRoc02))
  Power02 <- c()
  for(i in 1:dim(MarkerRoc02)[1]){
    MarkerRoc03 <- t(as.matrix(MarkerRoc02[i,seq(3,dim(MarkerRoc02)[2],3)]))
    Power01 <- Sort_MarkersPower2(MarkerRoc03,0)
    Power02 <- rbind(Power02,Power01)
  }
  rownames(Power02) <- rownames(MarkerRoc02)
  colnames(Power02) <- c('Cluster','Power','Diff')

  return(Power02)
}


Revise_MarkersPower <- function(MarkerRoc01){
  MarkerRoc02 <- apply(MarkerRoc01,1,function(x01){
    for(i0 in seq(2,length(x01),3)){
      if(x01[i0]<0){ x01[i0+1] <- -x01[i0+1] }
    }
    return(x01)
  } )

  return(MarkerRoc02)
}



Sort_MarkersPower2 <- function(MarkerRoc01,Thr01=0){

  MarkerRoc02 <- sort.int(as.numeric(MarkerRoc01),index.return=T,decreasing=T)
  Name01 <- colnames(MarkerRoc01)
  if(length(MarkerRoc01)==0|max(MarkerRoc02$x)<=Thr01){
    MarkerRoc03 <- c('', '', '');
  }else if(length(MarkerRoc02$x)==1 & length(MarkerRoc02$x[MarkerRoc02$x>Thr01])==1 |
           length(MarkerRoc02$x)!=1 & length(MarkerRoc02$x[MarkerRoc02$x>Thr01])==1 & MarkerRoc02$x[2]<0){
    MarkerRoc03 <- c(paste(Name01[MarkerRoc02$ix[1]],paste0('Ctr',Thr01),sep=','), paste(MarkerRoc02$x[1],Thr01,sep=','), MarkerRoc02$x[1]);
  }else{
    if(length(MarkerRoc02$x)!=1 & length(MarkerRoc02$x[MarkerRoc02$x>Thr01])==1){
      MarkerRoc02x <- MarkerRoc02$x[1:2]
      MarkerRoc02ix <- MarkerRoc02$ix[1:2]
    }else{
      MarkerRoc02x <- c(MarkerRoc02$x[MarkerRoc02$x>Thr01],Thr01)
      MarkerRoc02ix <- c(MarkerRoc02$ix[MarkerRoc02$x>Thr01],length(MarkerRoc02$ix)+1)
      Name01 <- c(Name01,paste0('Ctr',Thr01))
    }
    Power01 <- Cal_RelativePower(MarkerRoc02x)
    maxPower01 <- which.max(Power01)
    if(maxPower01==length(MarkerRoc02x)){ maxPower01=length(Power01)-1; }

    MarkerRoc03 <- c(paste(Name01[MarkerRoc02ix[1:(maxPower01+1)]],collapse=','), paste(MarkerRoc02x[1:(maxPower01+1)],collapse=','), max(Power01));
  }

  return(MarkerRoc03)
}


Identify_Markers2 <- function(pbmc, Marker, PowerThr1=1/3){

  TotoalCluster <- length(unique(pbmc@active.ident))
  MarkerRoc1 <- Marker
  NumCluster <- apply(MarkerRoc1,1,function(X1){length(strsplit(as.character(X1[1]),',')[[1]])})
  MarkerRoc2 <- cbind(MarkerRoc1,NumCluster)
  MarkerRoc3 <- MarkerRoc2[MarkerRoc2$Diff>PowerThr1 & MarkerRoc2$NumCluster>1 & MarkerRoc2$NumCluster<=(ceiling(TotoalCluster/4)+1),]
  Cluster <- apply(MarkerRoc3, 1, function(X1){strsplit(X1[1],',')[[1]][1]})
  colnames(MarkerRoc3)[1]='AllCluster'
  MarkerRoc3$Cluster <- Cluster
  MarkerRoc3$gene <- rownames(MarkerRoc3)
  MarkerRoc3 <- MarkerRoc3[,c(1,2,4,5)]
  return(MarkerRoc3)
}





Refine_Markers<-function(Seurat_object,Marker,p.value = 0.05){
  MarkerRoc3 <- Identify_Markers3(Seurat_object, Marker)
  MarkerRoc4 <- MarkerRoc3[MarkerRoc3[,'Pvalue'] < 0.05,]
  print(c(nrow(MarkerRoc3), nrow(MarkerRoc4)))
  return(MarkerRoc4)
}



Identify_Markers3 <- function(pbmc, MarkerRoc2){

  pbmc@assays$RNA@data <- GetAssayData(pbmc)[rownames(MarkerRoc2), ]
  Frac1 <- Get_scRNA_AggExp(pbmc)
  colnames(Frac1) <- gsub('_0', '', colnames(Frac1))
  colnames(Frac1) <- gsub('FracC', 'Cluster', colnames(Frac1))
  MarkerRoc3 <- cbind(MarkerRoc2, Frac1)
  AllCluster1 <- as.matrix(table(pbmc@active.ident))
  rownames(AllCluster1) <- paste0('Cluster', rownames(AllCluster1))

  MarkerRoc31 <- apply(MarkerRoc3, 1, function(x1){
    x11 <- strsplit(x1[1], ',')[[1]]
    x12 <- x11[1:(length(x11)-1)]; x13 <- setdiff(rownames(AllCluster1), x12)
    TNum11 <- as.numeric(AllCluster1[match(x12, rownames(AllCluster1)),1])
    TNum12 <- as.numeric(AllCluster1[match(x13, rownames(AllCluster1)),1])
    Frac11 <- as.numeric(x1[match(x12, names(x1))]); mFrac11 <- which.min(Frac11)
    Frac12 <- as.numeric(x1[match(x13, names(x1))]); mFrac12 <- which.max(Frac12)
    TNum21 <- TNum11[mFrac11]; TNum22 <- TNum12[mFrac12]
    Frac21 <- Frac11[mFrac11]; Frac22 <- Frac12[mFrac12]
    TNum31 <- round(TNum21*Frac21); TNum32 <- round(TNum22*Frac22)

    Num4 <- matrix(c(TNum31, TNum21-TNum31, TNum32, TNum22-TNum32), nrow=2)
    if (FALSE %in% (c(TNum31, TNum21-TNum31, TNum32, TNum22-TNum32)>5)) {
      Test1 <- fisher.test(Num4)
    }else{Test1 <- chisq.test(Num4)}
    return(Test1$p.value)

  })
  MarkerRoc4 <- cbind(MarkerRoc3, MarkerRoc31)
  colnames(MarkerRoc4)[ncol(MarkerRoc4)] <- 'Pvalue'

  return(MarkerRoc4)
}




Get_scRNA_AggExp <- function(pbmc){

  Cluster1 <- 'idents'
  ExpType1 <- 'fraction'
  Group1 <- 'all'
  pbmc@meta.data[,'all'] <- 0
  pbmc@meta.data$idents <- pbmc@active.ident
  Meta01 <- pbmc@meta.data
  Cluster2 <- as.character(sort(unique(Meta01[, Cluster1[1]])))
  Group2 <- sort(unique(Meta01[, Group1[1]]))

  Name01 <- c(); flag01 <- 0;
  for(i0 in 1:length(Cluster2)){
    for(j0 in 1:length(Group2)){
      Cell01 <- Meta01[Meta01[, Cluster1[1]]==Cluster2[i0] & Meta01[, Group1[1]]==Group2[j0], ]
      Object02 <- as.matrix(GetAssayData(pbmc)[, rownames(Cell01)]); print(c(Cluster2[i0], as.character(Group2[j0]), ncol(Object02)))

      if(ncol(Object02)==0){
      }else{
        Frac1 <- t(apply(Object02, 1, function(x1){
          x12 <- length(x1[x1!=0])
          x2 <- x12/length(x1)
          return(c(x12, x2))
        }))
        if(is.element('expression', ExpType1) & is.element('fraction', ExpType1)){ mObject03 <- cbind(rowMeans(Object02), Frac1[,2])
        Name01 <- c(Name01, paste0(c('Exp','Frac'),paste0('C',Cluster2[i0],'_',Group2[j0])))
        }else if(is.element('expression', ExpType1)){ mObject03 <- rowMeans(Object02)
        Name01 <- c(Name01, paste0('ExpC',Cluster2[i0],'_',Group2[j0]))
        }else if(is.element('fraction', ExpType1)){ mObject03 <- Frac1[,2]
        Name01 <- c(Name01, paste0('FracC',Cluster2[i0],'_',Group2[j0]))
        }else if(is.element('number', ExpType1)){ mObject03 <- Frac1[,1]
        Name01 <- c(Name01, paste0('NumC',Cluster2[i0],'_',Group2[j0]))
        }else{ stop(paste0('Please choose ExpType1 for expression or fraction')) }

        if(flag01==0){ mObject04 <- mObject03; flag01 <- 1;
        }else{ mObject04 <- cbind(mObject04, mObject03) }
      } } }
  mObject04 <- as.matrix(mObject04); colnames(mObject04) <- Name01
  return(mObject04)
}
