% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/seqaddNA.R
\name{seqaddNA}
\alias{seqaddNA}
\title{Generation of missing on longitudinal categorical data.}
\usage{
seqaddNA(
  data,
  var = NULL,
  states.high = NULL,
  propdata = 1,
  pstart.high = 0.1,
  pstart.low = 0.005,
  pcont = 0.66,
  maxgap = 3,
  maxprop = 0.75,
  only.traj = FALSE
)
}
\arguments{
\item{data}{A data frame containing sequences of a categorical (multinomial)
variable, where missing data are coded as \code{NA}.}

\item{var}{A vector specifying the columns of the dataset
that contain the trajectories. Default is \code{NULL}, meaning all columns
are used.}

\item{states.high}{A list of states with a higher probability of
initiating a subsequent missing data gap.}

\item{propdata}{Proportion of trajectories for which missing data
is simulated, as a decimal between 0 and 1.}

\item{pstart.high}{Probability of starting a missing data gap for the
states specified in the \code{states.high} argument.}

\item{pstart.low}{Probability of starting a missing data gap for all
other states.}

\item{pcont}{Probability of a missing data gap to continue.}

\item{maxgap}{Maximum length of a missing data gap.}

\item{maxprop}{Maximum proportion of missing data allowed in a sequence,
as a decimal between 0 and 1.}

\item{only.traj}{Logical, if \code{TRUE}, only the
trajectories (specified in \code{var}) are returned. If \code{FALSE},
the entire data frame is returned.}
}
\value{
A data frame with simulated missing data.
}
\description{
Generation of missing data in sequence based on a Markovian
approach.
}
\details{
The first time point of a trajectory has a \code{pstart.low} probability to
be missing. For the next time points, the probability to be missing depends
on the previous time point. There are four cases:

1. If the previous time point is missing and the maximum length of a
missing gap, which is specified by the argument \code{maxgap}, is reached,
the time point is set as observed.

2. If the previous time point is missing, but the maximum length of a gap is
not reached, there is a \code{pcont} probability that this time point is missing.

3. If the previous time point is observed and the previous time point belongs
to the list of states specified by \code{pstart.high}, the probability to
be missing is \code{pstart.high}.

4. If the previous time point is observed but the previous time point does not
belong to the list of states specified by \code{pstart.high}, the
probability to be missing is \code{pstart.low}.

If the proportion of missing data in a given trajectory exceeds the
proportion specified by \code{maxprop}, the missing data simulation is
repeated for the sequence.
}
\examples{
# Generate MCAR missing data on the mvad dataset
# from the TraMineR package

\dontrun{
data(mvad, package = "TraMineR")
mvad.miss <- seqaddNA(mvad, var = 17:86)


# Generate missing data on mvad where joblessness is more likely to trigger
# a missing data gap
mvad.miss2 <- seqaddNA(mvad, var = 17:86, states.high = "joblessness")
}

}
\author{
Kevin Emery
}
