% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/improvedktaucenters.R
\name{improvedktaucenters}
\alias{improvedktaucenters}
\title{improvedktaucenters}
\usage{
improvedktaucenters(X, K, cutoff = 0.999, nstart = 5, INITcenters = NULL)
}
\arguments{
\item{X}{numeric matrix of size n x p.}

\item{K}{number of clusters.}

\item{cutoff}{argument for outliers detection - quantiles of chi-square
to be used as a threshold for outliers detection, defaults to 0.999.}

\item{nstart}{number of trials that the base ktaucenters is run at the first stage.
If it is greater than 1 and center is not set as NULL, a random set of (distinct) 
rows in x is chosen as the initial centres for each trial.}

\item{INITcenters}{numeric matrix of size K x p indicating the initial centers for
that clusters and robust covariance matrices will be computed, if it is set as NULL the
algorithm will compute from ktaucenters routine. Set to NULL by default.}
}
\value{
A list with the following components:
 \item{\code{centers}}{: Matrix of size K x p, with the estimated K centers.}
 \item{\code{cluster}}{: A vector of integer (from 1:k) indicating the cluster to
which each point is allocated.}
 \item{\code{sigmas}}{: A list containing the k covariance matrices found by the 
procedure at its second step.}
 \item{\code{outliers}}{: indices observation that can be considered as outliers.}
}
\description{
Robust Clustering algorithm for non-spherical data. This function estimate
clusters taking into account that clusters may have
different size, volume or orientation.
}
\examples{

# Generate synthetic data (three normal cluster in two dimensions)
# Clusters have different shapes and orientation.
# The data is contaminated uniformly (level 20\%).

# Generates base clusters
set.seed(1)
Z1 <- c(rnorm(100, 0), rnorm(100, 0), rnorm(100, 0))
Z2 <- rnorm(300)
X <- matrix(0, ncol = 2, nrow = 300)
X[, 1] <- Z1
X[, 2] <- Z2
true.cluster <- c(rep(1, 100), rep(2, 100), rep(3, 100))

# Rotate, expand and translate base clusters
theta <- pi/3
aux1 <- matrix(c(cos(theta), -sin(theta), sin(theta), cos(theta)), nrow = 2)
aux2 <- sqrt(4) * diag(c(1, 1/4))
B <- aux1 \%*\% aux2 \%*\% t(aux1)
X[true.cluster == 3, ] <-
  X[true.cluster == 3, ] \%*\% aux2 \%*\% aux1 + matrix(c(5, 2),
                                                  byrow = TRUE,
                                                  nrow = 100,
                                                  ncol = 2)
X[true.cluster == 2, 2] <- X[true.cluster == 2, 2] * 5
X[true.cluster == 1, 2] <- X[true.cluster == 1, 2] * 0.1
X[true.cluster == 1, ] <- X[true.cluster == 1, ] + matrix(c(-5, -1),
                                                          byrow = TRUE,
                                                          nrow = 100,
                                                          ncol = 2)

# Generate 60 synthetic outliers (contamination level 20\%)

outliers <- sample(1:300, 60)
X[outliers, ] <- matrix(runif( 40, 2 * min(X), 2 * max(X) ),
                                ncol = 2, nrow = 60)

# Applying the algorithm
robust <- improvedktaucenters(X, K = 3, cutoff = 0.999)

# Plotting results
oldpar <- par(mfrow = c(2, 1))
plot(X, main = "Actual clusters")
for (j in 1:3){
 points(X[true.cluster == j, ], pch = 19, col = j + 1)
}
points(X[outliers, ], pch = 19, col = 1)
plot(X, main = "Clusters estimation")
for (j in 1:3){
 points(X[robust$cluster == j,], pch = 19, col = j + 1)
}
points(X[robust$outliers, ], pch = 19)

par(oldpar)
}
\references{
Gonzalez, J. D., Yohai, V. J., & Zamar, R. H. (2019).
Robust Clustering Using Tau-Scales. arXiv preprint arXiv:1906.08198.
}
