\encoding{latin1}

\name{seqsubm}

\alias{seqsubm}

\title{Create a substitution-cost matrix}

\description{
The substitution-cost matrix is used when computing distances between sequences by the method of optimal matching. The function creates the substitution matrix using either a constant or the transition rates computed from the sequence data or other methods to be implemented in the future.
}

\details{
	The substitution-cost matrix has dimension \eqn{ns*ns}, where \eqn{ns} is the number of states in the \link{alphabet} of the sequence object. The element \eqn{(i,j)} of the matrix is the cost of substituting state \eqn{i} whith state \eqn{j}.
	In the "constant" method, the substitution costs are the same for all the states, with a default value of 2. An alternate value can be provided by the user. When the "transition rates" method is choosen, the transition rates between all states are computed using the \link{seqtrate} function. The substitution cost between states \eqn{i} and \eqn{j} is obtained with the formula 
\deqn{SC(i,j) = 2 -P(i,j) -P(j,i)}
where \eqn{P(i,j)} is the transition rate between states \eqn{i} and \eqn{j}.  
}
\usage{
 seqsubm(seqdata, method, cval=2, 
	with.miss=FALSE, miss.cost=2)
}
\arguments{
  \item{seqdata}{a sequence object as returned by the \link{seqdef} function.}

  \item{method}{method to compute transition rates. At this time, the methods available are constant value (method="CONSTANT") or substitution costs using transition rates (method="TRATE")}
  \item{cval}{the constant substitution cost if method "CONSTANT" is choosen. Otherwise, do not specify.}
	
	\item{with.miss}{if \code{TRUE}, an additional entry is added in the matrix for the missing states. Hence, a new "missing" state is added to the list of "valid" states. Use this if you want to compute distances with missing values inside the sequences. See \cite{Gabadinho, 2008} for more details on the options for handling missing values when computing distances between sequences.}

	\item{miss.cost}{the substitution cost for the missing state.}
}
\seealso{
 \code{\link{seqtrate}} \code{\link{seqdef}}.
}

\references{Gabadinho, A., G. Ritschard, M. Studer and N. S. Mller (2008). Mining Sequence Data in \code{R} with \code{TraMineR}: A user's guide. \emph{Department of Econometrics and Laboratory of Demography, University of Geneva}.
}

\examples{
## Defining a sequence object with columns 10 to 25 
## in the 'biofam' example data set 
data(biofam)
biofam.seq <- seqdef(biofam,10:25)

## Optimal matching using transition rates based substitution-cost matrix
## and insertion/deletion costs of 3
trcost <- seqsubm(biofam.seq, method="TRATE")
biofam.om <- seqdist(biofam.seq,method="OM",indel=3,sm=trcost)

## Optimal matching using constant value (2) substitution-cost matrix
## and insertion/deletion costs of 3
ccost <- seqsubm(biofam.seq, method="CONSTANT", cval=2)
biofam.om.c2 <- seqdist(biofam.seq, method="OM",indel=3,sm=ccost)

## Displaying the distance matrix for the first 10 sequences
biofam.om.c2[1:10,1:10]
}
\keyword{misc}
