% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/snpClust.R
\name{snpClust}
\alias{snpClust}
\title{Adjacency-constrained Clustering of Single Nucleotide Polymorphisms}
\usage{
snpClust(x, h = ncol(x) - 1, stats = c("R.squared", "D.prime"))
}
\arguments{
\item{x}{either a genotype matrix of class
\code{\link[snpStats:SnpMatrix-class]{SnpMatrix}}/\code{\link{matrix}} or a
linkage disequilibrium matrix of class
\code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}. In the latter case the LD
values are expected to be in [0,1]}

\item{h}{band width. If not provided, \code{h} is set to default value `p-1`
where `p` is the number of columns of \code{x}}

\item{stats}{a character vector specifying the linkage disequilibrium
measures to be calculated (using the \code{\link[snpStats:ld]{ld}}
function) when \code{x} is a genotype matrix. Only "R.squared" and
"D.prime" are allowed, see Details.}
}
\value{
An object of class \code{\link{chac}} (when no LD value is missing)
}
\description{
Adjacency-constrained hierarchical agglomerative clustering of Single
Nucleotide Polymorphisms based on Linkage Disequilibrium
}
\details{
Adjacency-constrained hierarchical agglomerative clustering (HAC) is HAC in
which each observation is associated to a position, and the clustering is
constrained so as only adjacent clusters are merged. SNPs are clustered based
on their similarity as measured by the linkage disequilibrium.

In the special case where genotypes are given as input and the corresponding
LD matrix has missing entries, the clustering cannot be performed. This can
typically happen when there is insufficient variability in the sample
genotypes. In this special case, the indices of the SNP pairs which yield
missing values are returned.

If \code{x} is of class
  \code{\link[snpStats:SnpMatrix-class]{SnpMatrix}} or \code{\link{matrix}},
  it is assumed to be a \eqn{n \times p} matrix of \eqn{p} genotypes for
  \eqn{n} individuals. This input is converted to a LD similarity matrix
  using the \code{snpStats::ld}. If \code{x} is of class
  \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, it is assumed to be a
  (squared) LD matrix.

  Clustering on a LD similarity other than "R.squared" or "D.prime" can be
  performed by providing the LD values directly as argument \code{x}. These
  values are expected to be in [0,1], otherwise they are truncated to [0,1].
}
\examples{
## a very small example
if (requireNamespace("snpStats", quietly = TRUE)) {
  data(testdata, package = "snpStats")

  # input as snpStats::SnpMatrix
  fit1 <- snpClust(Autosomes[1:200, 1:5], h = 3, stats = "R.squared")

  # input as base::matrix
  fit2 <- snpClust(as.matrix(Autosomes[1:200, 1:5]), h = 3, stats = "R.squared")

  # input as Matrix::dgCMatrix
  ldres <- snpStats::ld(Autosomes[1:200, 1:5], depth = 3, stats = "R.squared", symmetric = TRUE)
  fit3 <- snpClust(ldres, 3)
}

}
\references{
Dehman A. (2015) \emph{Spatial Clustering of Linkage
  Disequilibrium Blocks for Genome-Wide Association Studies}, PhD thesis,
  Universite Paris Saclay.

Dehman, A. Ambroise, C. and Neuvial, P. (2015). Performance of a
  blockwise approach in variable selection using linkage disequilibrium
  information. *BMC Bioinformatics* 16:148.

Ambroise C., Dehman A., Neuvial P., Rigaill G., and Vialaneix N
  (2019). \emph{Adjacency-constrained hierarchical clustering of a band
  similarity matrix with application to genomics}, Algorithms for Molecular
  Biology 14(22)"
}
\seealso{
\code{\link{adjClust}} \code{\link[snpStats:ld]{ld}}
}
