% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_conversion.R
\name{list_diploid_params}
\alias{list_diploid_params}
\title{Collect essential data values before mixture proportion estimation}
\usage{
list_diploid_params(AC_list, I_list, PO, coll_N, RU_vec, RU_starts,
  alle_freq_prior = list(const_scaled = 1))
}
\arguments{
\item{AC_list}{a list of allele count matrices; output from \code{a_freq_list}}

\item{I_list}{a list of genotype vectors; output from \code{allelic_list}}

\item{PO}{a vector of collection (population of origin) indices
for every individual in the sample, in order identical to \code{I_list}}

\item{coll_N}{a vector of the total number of individuals in each collection,
in order of appearance in the dataset}

\item{RU_vec}{a vector of collection indices, sorted by reporting unit}

\item{RU_starts}{a vector of indices, designating the first collection for each
reporting unit in RU_vec}

\item{alle_freq_prior}{a one-element named list specifying the prior to be used when
generating Dirichlet parameters for genotype likelihood calculations. The name of the
list item determines the type of prior used, with options \code{"const"}, \code{"scaled_const"},
and \code{"empirical"}. If \code{"const"}, the listed number will be taken as a constant
added to the count for each allele, locus, and collection.
If \code{"scaled_const"}, the listed number will be divided by the number of alleles at a locus,
then added to the allele counts. If \code{"empirical"}, the listed number will be multiplied
by the relative frequency of each allele across all populations, then added to the allele counts.}
}
\value{
\code{list_diploid_params} returns a list of the information necessary
for the calculation of genotype likelihoods in MCMC:

\code{L}, \code{N}, and \code{C} represent the number of loci, individual genotypes,
and collections, respectively. \code{A} is a vector of the number of alleles at each
locus, and \code{CA} is the cumulative sum of \code{A}. \code{coll}, \code{coll_N},
\code{RU_vec}, and \code{RU_starts} are copied directly from input.

\code{I}, \code{AC}, \code{sum_AC}, \code{DP}, and \code{sum_DP} are vectorized
versions of data previously represented as lists and matrices; indexing macros
use \code{L}, \code{N}, \code{C}, \code{A}, and \code{CA} to access these vectors
in later Rcpp-based calculations.
}
\description{
Takes all relevant information created in previous steps of data conversion pipeline,
and combines into a single list which serves as input for further calculations
}
\details{
Genotypes represented in \code{I_list} are converted into a single long vector,
ordered by locus, individual, and gene copy, with \code{NA} values represented as 0s.
Similarly, \code{AC_list} is unlisted to \code{AC}, ordered by locus, collection,
and allele. \code{DP} is a list of Dirichlet priors for likelihood calculations, created
by adding the values calculated from \code{alle_freq_prior} to each allele
\code{sum_AC} and \code{sum_DP} are the summed allele values for each locus
of their parent vectors, ordered by locus and collection.
}
\examples{
example(allelic_list)
PO <- as.integer(factor(ale_long$clean_short$collection))
coll_N <- as.vector(table(PO))

Colls_by_RU <- dplyr::count(ale_long$clean_short, repunit, collection) \%>\%
   dplyr::filter(n > 0) \%>\%
   dplyr::select(-n)
 PC <- rep(0, length(unique((Colls_by_RU$repunit))))
 for(i in 1:nrow(Colls_by_RU)) {
   PC[Colls_by_RU$repunit[i]] <- PC[Colls_by_RU$repunit[i]] + 1
 }
RU_starts <- c(0, cumsum(PC))
RU_vec <- as.integer(Colls_by_RU$collection)
param_list <- list_diploid_params(ale_ac, ale_alle_list, PO, coll_N, RU_vec, RU_starts)

}
\keyword{internal}
