% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MBRM.R
\name{MBRM}
\alias{MBRM}
\title{Morisita-Based Filter for Redundancy Minimization}
\usage{
MBRM(X, scaleQ, m=2, C=NULL, ID_tot=NULL)
}
\arguments{
\item{X}{A \eqn{N \times E}{N x E} \code{matrix}, \code{data.frame} or \code{data.table}
where \eqn{N} is the number of data points and \eqn{E} is the number of variables (or features).
Each variable is rescaled to the \eqn{[0,1]} interval by the function.}

\item{scaleQ}{A vector containing the values of \eqn{\ell^{-1}}{l^(-1)}
chosen by the user (see Details).}

\item{m}{The value of the parameter m (by default: \code{m=2}).}

\item{C}{The number of steps of the SFS procedure (by default: \code{C = E}).}

\item{ID_tot}{The value of the full data ID if it is known a priori (by default:
the value of ID_tot is estimated using the Morisita estimator of ID witin
the function).}
}
\value{
A list of four elements:
 \enumerate{
 \item a vector containing the identifier numbers of the original features in
 the order they are selected through the Sequential Forward Selection (SFS)
 search procedure.
 \item the names of the corresponding features.
 \item the corresponding ID estimates.
 \item the ID estimate of the full data set.
 }
}
\description{
Executes the MBRM algorithm for unsupervised feature selection.
}
\details{
\enumerate{
 \item \eqn{\ell}{l} is the edge length of the grid cells (or quadrats). Since the the variables
 (and consenquently the grid) are rescaled to the \eqn{[0,1]} interval, \eqn{\ell}{l} is equal
 to \eqn{1} for a grid consisting of only one cell.
 \item \eqn{\ell^{-1}}{l^(-1)} is the number of grid cells (or quadrats) along each axis of the
 Euclidean space in which the data points are embedded.
 \item \eqn{\ell^{-1}}{l^(-1)} is equal to \eqn{Q^{(1/E)}}{Q^(1/E)} where \eqn{Q} is the number
 of grid cells and \eqn{E} is the number of variables (or features).
 \item \eqn{\ell^{-1}}{l^(-1)} is directly related to \eqn{\delta}{delta} (see References).
 \item \eqn{\delta}{delta} is the diagonal length of the grid cells.
 \item The values of \eqn{\ell^{-1}}{l^(-1)} in \code{scaleQ} must be chosen according to the linear
 part of the \eqn{\log}{log}-\eqn{\log}{log} plot relating the \eqn{\log}{log} values of the
 multipoint Morisita index to the \eqn{\log}{log} values of \eqn{\delta}{delta} (or,
 equivalently, to the \eqn{\log}{log} values of \eqn{\ell^{-1}}{l^(-1)}) (see \code{logMINDEX}).
}
}
\examples{
bf <- Butterfly(10000)

bf_select <- MBRM(bf[,-9], 5:25)
var_order <- bf_select[[2]]
var_perf  <- bf_select[[3]]

dev.new(width=5, height=4)
plot(var_perf,type="b",pch=16,lwd=2,xaxt="n",xlab="", ylab="",
     col="red",ylim=c(0,max(var_perf)),panel.first={grid(lwd=1.5)})
axis(1,1:length(var_order),labels=var_order)
mtext(1,text="Added Features (from left to right)",line=2.5,cex=1)
mtext(2,text="Estimated ID",line=2.5,cex=1)
}
\references{
J. Golay and M. Kanevski (2017). Unsupervised feature selection based on the
Morisita estimator of intrinsic dimension,
\href{http://www.sciencedirect.com/science/article/pii/S0950705117303659}{Knowledge-Based Systems 135:125-134}.
}
\author{
Jean Golay \email{Jean.Golay@unil.ch}
}
