% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pivotal.R
\name{piv_sel}
\alias{piv_sel}
\title{Pivotal Selection via Co-Association Matrix}
\usage{
piv_sel(C, clusters)
}
\arguments{
\item{C}{A \eqn{N \times N} co-association matrix, i.e.
a matrix whose elements are co-occurrences of pair of units
in the same cluster among \eqn{H} distinct partitions.}

\item{clusters}{A vector of integers indicating
a partition of the \eqn{N} units into, say, \eqn{k} groups.}
}
\value{
\item{\code{pivots}}{ A matrix with \eqn{k} rows and three
columns containing the indexes of the pivotal units for each method.}
}
\description{
Finding the pivots according to three different
methods involving a co-association matrix C.
}
\details{
Given a set of \eqn{N} observations \eqn{(y_{1},y_{2},...,y_{N})}
(\eqn{y_i} may be a \eqn{d}-dimensional vector, \eqn{d \geq 1}),
consider clustering methods to obtain \eqn{H} distinct partitions
into \eqn{k} groups.
The matrix \code{C} is the co-association matrix,
where \eqn{c_{i,p}=n_{i,p}/H}, with \eqn{n_{i,p}} the number of times
the pair \eqn{(y_{i},y_{p})} is assigned to the same
cluster among the \eqn{H} partitions.

Let \eqn{j} be the group containing units \eqn{\mathcal J_j},
the user may choose \eqn{{i^*}\in\mathcal J_j} that
maximizes one of the quantities:
\deqn{
 \sum_{p\in\mathcal J_j} c_{{i^*}p}}

 or
 \deqn{\sum_{p\in\mathcal J_j} c_{{i^*}p} - \sum_{j\not\in\mathcal J_j} c_{{i^*}p}.
}

These methods give the unit that maximizes the global
within similarity (\code{"maxsumint"}) and the unit that
maximizes the difference between global within and
between similarities (\code{"maxsumdiff"}), respectively.
Alternatively, we may choose \eqn{i^{*} \in\mathcal J_j}, which minimizes:
\deqn{\sum_{p\not\in\mathcal J_j} c_{i^{*}p},}
obtaining the most distant unit among the members
that minimize the global dissimilarity between one group
and all the others (\code{"minsumnoint"}).
See the vignette for further details.
}
\examples{
# Iris data

data(iris)
x<- iris[,1:4]
N <- length(iris[,1])
H <- 1000
a <- matrix(NA, H, N)

# Perform H k-means partitions

for (h in 1:H){
 a[h,] <- kmeans(x, centers = 3)$cluster
}
# Build the co-association matrix

C <- matrix(1, N,N)
for (i in 1:(N-1)){
 for (j in (i+1):N){
   C[i,j] <- sum(a[,i]==a[,j])/H
   C[j,i] <- C[i,j]
 }}

km <- kmeans(x, centers =3)

# Find the pivots according to the three possible pivotal criterion

ris <- piv_sel(C, clusters = km$cluster)

plot(iris[,1], iris[,2], xlab ="Sepal.Length", ylab= "Sepal.Width",
col = km$cluster)

 # Add the pivots according to maxsumdiff criterion

points( x[ris$pivot[,3], c( "Sepal.Length","Sepal.Width" )], col = 1:3,
cex =2, pch = 8 )

}
\references{
Egidi, L., Pappadà, R., Pauli, F. and Torelli, N. (2018). Relabelling in Bayesian Mixture
Models by Pivotal Units. Statistics and Computing, 28(4), 957-969.
}
\author{
Leonardo Egidi \url{legidi@units.it}
}
