% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/genscore.R
\name{get_elts}
\alias{get_elts}
\title{The function wrapper to get the elements necessary for calculations for all settings.}
\usage{
get_elts(
  h_hp,
  x,
  setting,
  domain,
  centered = TRUE,
  profiled_if_noncenter = TRUE,
  scale = "",
  diagonal_multiplier = 1,
  use_C = TRUE,
  tol = .Machine$double.eps^0.5,
  unif_dist = NULL
)
}
\arguments{
\item{h_hp}{A function that returns a list containing \code{hx=h(x)} (element-wise) and \code{hpx=hp(x)} (element-wise derivative of \eqn{h}) when applied to a vector or a matrix \code{x}, both of which has the same shape as \code{x}.}

\item{x}{An \code{n} by \code{p} matrix, the data matrix, where \code{n} is the sample size and \code{p} the dimension.}

\item{setting}{A string that indicates the distribution type, must be one of \code{"exp"}, \code{"gamma"}, \code{"gaussian"}, \code{"log_log"}, \code{"log_log_sum0"}, or of the form \code{"ab_NUM1_NUM2"}, where \code{NUM1} is the \code{a} value and \code{NUM2} is the \code{b} value, and \code{NUM1} and \code{NUM2} must be integers or two integers separated by "/", e.g. "ab_2_2", "ab_2_5/4" or "ab_2/3_1/2". If \code{domain$type == "simplex"}, only \code{"log_log"} and \code{"log_log_sum0"} are supported, and on the other hand \code{"log_log_sum0"} is supported for \code{domain$type == "simplex"} only.}

\item{domain}{A list returned from \code{make_domain()} that represents the domain.}

\item{centered}{A boolean, whether in the centered setting(assume \eqn{\boldsymbol{\mu}=\boldsymbol{\eta}=0}{\mu=\eta=0}) or not. Default to \code{TRUE}.}

\item{profiled_if_noncenter}{A boolean, whether in the profiled setting (\eqn{\lambda_{\boldsymbol{\eta}}=0}{\lambda_\eta=0}) if non-centered. Parameter ignored if \code{centered=TRUE}. Default to \code{TRUE}. Can only be \code{FALSE} if \code{setting == "log_log_sum0" && centered == FALSE}.}

\item{scale}{A string indicating the scaling method. If contains \code{"sd"}, columns are scaled by standard deviation; if contains \code{"norm"}, columns are scaled by l2 norm; if contains \code{"center"} and \code{setting == "gaussian" && domain$type == "R"}, columns are centered to have mean zero. Default to \code{"norm"}.}

\item{diagonal_multiplier}{A number >= 1, the diagonal multiplier.}

\item{use_C}{Optional. A boolean, use C (\code{TRUE}) or R (\code{FALSE}) functions for computation. Default to \code{TRUE}. Ignored if \code{setting == "gaussian" && domain$type == "R"}.}

\item{tol}{Optional. A positive number. If \code{setting != "gaussian" || domain$type != "R"}, function stops if any entry if smaller than -tol, and all entries between -tol and 0 are set to tol, for numerical stability and to avoid violating the assumption that \eqn{h(\mathbf{x})>0}{h(x)>0} almost surely.}

\item{unif_dist}{Optional, defaults to \code{NULL}. If not \code{NULL}, \code{h_hp} must be \code{NULL} and \code{unif_dist(x)} must return a list containing \code{"g0"} of length \code{nrow(x)} and \code{"g0d"} of dimension \code{dim(x)}, representing the l2 distance and the gradient of the l2 distance to the boundary: the true l2 distance function to the boundary is used for all coordinates in place of h_of_dist; see "Estimating Density Models with Complex Truncation Boundaries" by Liu et al, 2019. That is, \eqn{(h_j\circ \phi_j)(x_i)}{(h_j\circ phi_j)(xi)} in the score-matching loss is replaced by \eqn{g_0(x_i)}{g0(xi)}, the l2 distance of xi to the boundary of the domain.}
}
\value{
A list that contains the elements necessary for estimation.
  \item{n}{The sample size.}
  \item{p}{The dimension.}
  \item{centered}{The centered setting or not. Same as input.}
  \item{scale}{The scaling method. Same as input.}
  \item{diagonal_multiplier}{The diagonal multiplier. Same as input.}
  \item{diagonals_with_multiplier}{A vector that contains the diagonal entries of \eqn{\boldsymbol{\Gamma}}{\Gamma} after applying the multiplier.}
  \item{domain_type}{The domain type. Same as domain$type in the input.}
  \item{setting}{The setting. Same as input.}
  \item{g_K}{The \eqn{\boldsymbol{g}}{g} vector. In the non-profiled non-centered setting, this is the \eqn{\boldsymbol{g}}{g} sub-vector corresponding to \eqn{\mathbf{K}}{K}. A \eqn{p^2}-vector. Not returned if \code{setting == "gaussian" && domain$type == "R"} since it is just \eqn{diag(p)}.}
  \item{Gamma_K}{The \eqn{\boldsymbol{\Gamma}}{Gamma} matrix with no diagonal multiplier. In the non-profiled non-centered setting, this is the \eqn{\boldsymbol{\Gamma}}{\Gamma} sub-matrix corresponding to \eqn{\mathbf{K}}{K}. A vector of length \eqn{p^2} if \code{setting == "gaussian" && domain$type == "R"} or \eqn{p^3} otherwise.}
  \item{g_eta}{Returned in the non-profiled non-centered setting. The \eqn{\boldsymbol{g}}{g} sub-vector corresponding to \eqn{\boldsymbol{\eta}}{\eta}. A \eqn{p}-vector. Not returned if \code{setting == "gaussian" && domain$type == "R"} since it is just \eqn{numeric(p)}.}
  \item{Gamma_K_eta}{Returned in the non-profiled non-centered setting. The \eqn{\boldsymbol{\Gamma}}{\Gamma} sub-matrix corresponding to interaction between \eqn{\mathbf{K}}{K} and \eqn{\boldsymbol{\eta}}{\eta}. If \code{setting == "gaussian" && domain$type == "R"}, returns a vector of length \eqn{p}, or \eqn{p^2} otherwise.}
  \item{Gamma_eta}{Returned in the non-profiled non-centered setting. The \eqn{\boldsymbol{\Gamma}}{\Gamma} sub-matrix corresponding to \eqn{\boldsymbol{\eta}}{\eta}. A \eqn{p}-vector. Not returned if \code{setting == "gaussian" && domain$type == "R"} since it is just \code{rep(1,p)}.}
  \item{t1,t2}{Returned in the profiled non-centered setting, where the \eqn{\boldsymbol{\eta}}{\eta} estimate can be retrieved from \eqn{\boldsymbol{t_1}-\boldsymbol{t_2}\hat{\mathbf{K}}}{t1-t2*\hat{K}} after appropriate resizing.}
If \code{domain$type == "simplex", the following are also returned.}
  \item{Gamma_K_jp}{A matrix of size \code{p} by \code{p(p-1)}. The \code{(j-1)*p+1} through \code{j*p} columns represent the interaction matrix between the \code{j}-th column and the \code{m}-th column of \code{K}.}
  \item{Gamma_Kj_etap}{Non-centered only. A matrix of size \code{p} by \code{p(p-1)}. The \code{j}-th column represents the interaction between the \code{j}-th column of \code{K} and \code{eta[p]}.}
  \item{Gamma_Kp_etaj}{Non-centered only. A matrix of size \code{p} by \code{p(p-1)}. The \code{j}-th column represents the interaction between the \code{p}-th column of \code{K} and \code{eta[j]}. Note that it is equal to \code{Gamma_Kj_etap} if \code{setting} does not contain substring \code{"sum0"}.}
  \item{Gamma_eta_jp}{Non-centered only. A vector of size \code{p-1}. The \code{j}-th component represents the interaction between \code{eta[j]} and \code{eta[p]}.}
}
\description{
The function wrapper to get the elements necessary for calculations for all settings.
}
\details{
Computes the \eqn{\boldsymbol{\Gamma}}{\Gamma} matrix and the \eqn{\boldsymbol{g}}{g} vector for generalized score matching.

Here, \eqn{\boldsymbol{\Gamma}}{\Gamma} is block-diagonal, and in the non-profiled non-centered setting, the \eqn{j}-th block is composed of \eqn{\boldsymbol{\Gamma}_{\mathbf{KK},j}}{\Gamma_{KK,j}}, \eqn{\boldsymbol{\Gamma}_{\mathbf{K}\boldsymbol{\eta},j}}{\Gamma_{K\eta,j}} and its transpose, and finally \eqn{\boldsymbol{\Gamma}_{\boldsymbol{\eta\eta},j}}{\Gamma_{\eta\eta,j}}. In the centered case, only \eqn{\boldsymbol{\Gamma}_{\mathbf{KK},j}}{\Gamma_{KK,j}} is computed. In the profiled non-centered case, \deqn{\boldsymbol{\Gamma}_{j}\equiv\boldsymbol{\Gamma}_{\mathbf{KK},j}-\boldsymbol{\Gamma}_{\mathbf{K}\boldsymbol{\eta},j}\boldsymbol{\Gamma}_{\boldsymbol{\eta}\boldsymbol{\eta},j}^{-1}\boldsymbol{\Gamma}_{\mathbf{K}\boldsymbol{\eta}}^{\top}.}{\Gamma_j=\Gamma_{KK,j}-\Gamma_{K\eta,j}\Gamma_{\eta\eta,j}^(-1)\Gamma_{K\eta}'.}

Similarly, in the non-profiled non-centered setting, \eqn{\boldsymbol{g}}{g} can be partitioned \eqn{p} parts, each with a \eqn{p}-vector \eqn{\boldsymbol{g}_{\mathbf{K},j}}{g_{K,j}} and a scalar \eqn{g_{\boldsymbol{\eta},j}}{g_{\eta,j}}. In the centered setting, only \eqn{\boldsymbol{g}_{\mathbf{K},j}}{g_{K,j}} is needed. In the profiled non-centered case, \deqn{\boldsymbol{g}_j\equiv\boldsymbol{g}_{\mathbf{K},j}-\boldsymbol{\Gamma}_{\mathbf{K}\boldsymbol{\eta},j}\boldsymbol{\Gamma}_{\boldsymbol{\eta\eta},j}^{-1}g_{\boldsymbol{\eta},j}.}{g_j=g_{K,j}-\Gamma_{K\eta,j}\Gamma_{\eta\eta,j}^(-1)g_{\eta,j}.}

The formulae for the pieces above are
\deqn{\boldsymbol{\Gamma}_{\mathbf{KK},j}\equiv\frac{1}{n}\sum_{i=1}^nh\left(X_j^{(i)}\right){X_j^{(i)}}^{2a-2}{\boldsymbol{X}^{(i)}}^a{{\boldsymbol{X}^{(i)}}^a}^{\top},}{\Gamma_{KK,j}=1/n*\sum_{i=1}^n h(Xij)*Xij^(2a-2)*Xi^a*(Xi^a)',}
\deqn{\boldsymbol{\Gamma}_{\mathbf{K}\boldsymbol{\eta},j}\equiv-\frac{1}{n}\sum_{i=1}^nh\left(X_j^{(i)}\right){X_j^{(i)}}^{a+b-2}{\boldsymbol{X}^{(i)}}^a,}{\Gamma_{K\eta,j}=-1/n*\sum_{i=1}^n h(Xij)*Xij^(a+b-2)*Xi^a,}
\deqn{\boldsymbol{\Gamma}_{\boldsymbol{\eta\eta},j}\equiv\frac{1}{n}\sum_{i=1}^nh\left(X_j^{(i)}\right){X_j^{(i)}}^{2b-2},}{\Gamma_{\eta\eta,j}=1/n*\sum_{i=1}^n h(Xij)*Xij^(2b-2),}
\deqn{\boldsymbol{g}_{\mathbf{K},j}\equiv\frac{1}{n}\sum_{i=1}^n\left(h'\left(X_j^{(i)}\right){X_j^{(i)}}^{a-1}+(a-1)h\left(X_j^{(i)}\right){X_j^{(i)}}^{a-2}\right){\boldsymbol{X}^{(i)}}^a+ah\left(X_j^{(i)}\right){X_j^{(i)}}^{2a-2}\boldsymbol{e}_{j,p},}{g_{K,j}=1/n*\sum_{i=1}^n (h'(Xij)*Xij^(a-1)+(a-1)*h(Xij)*Xij^(a-2))*Xi^a+a*h(Xij)*Xij^(2a-2)*e_{j,p},}
\deqn{\boldsymbol{g}_{\boldsymbol{\eta},j}\equiv\frac{1}{n}\sum_{i=1}^n-h'\left(X_j^{(i)}\right){X_j^{(i)}}^{b-1}-(b-1)h\left(X_j^{(i)}\right){X_j^{(i)}}^{b-2},}{g_{\eta,j}=1/n*\sum_{i=1}^n -h'(Xij)*Xij^(b-1)-(b-1)*h(Xij)*Xij^(b-2)),}
where \eqn{\boldsymbol{e}_{j,p}}{e_{j,p}} is the \eqn{p}-vector with 1 at the \eqn{j}-th position and 0 elsewhere.

In the profiled non-centered setting, the function also returns \eqn{t_1}{t1} and \eqn{t_2}{t2} defined as
\deqn{\boldsymbol{t}_1\equiv\boldsymbol{\Gamma}_{\boldsymbol{\eta\eta}}^{-1}\boldsymbol{g}_{\boldsymbol{\eta}},\quad\boldsymbol{t}_2\equiv\boldsymbol{\Gamma}_{\boldsymbol{\eta\eta}}^{-1}\boldsymbol{\Gamma}_{\mathbf{K}\boldsymbol{\eta}}^{\top},}{t1=\Gamma_{\eta\eta}^(-1)g_{\eta}, t2=\Gamma_{\eta\eta}^(-1)\Gamma_{K\eta}',}
so that \eqn{\hat{\boldsymbol{\eta}}=\boldsymbol{t}_1-\boldsymbol{t}_2\mathrm{vec}(\hat{\mathbf{K}}).}{\hat{\eta}=t1-t2*vec(\hat{K}).}
}
\examples{
n <- 30
p <- 10
eta <- rep(0, p)
K <- diag(p)
dm <- 1 + (1-1/(1+4*exp(1)*max(6*log(p)/n, sqrt(6*log(p)/n))))

# Gaussian on R^p:
domain <- make_domain("R", p=p)
x <- mvtnorm::rmvnorm(n, mean=solve(K, eta), sigma=solve(K))
# Equivalently:
\donttest{
x2 <- gen(n, setting="gaussian", abs=FALSE, eta=eta, K=K, domain=domain, finite_infinity=100, 
        xinit=NULL, burn_in=1000, thinning=100, verbose=FALSE)
}
elts <- get_elts(NULL, x, "gaussian", domain, centered=TRUE, scale="norm", diag=dm)
elts <- get_elts(NULL, x, "gaussian", domain, FALSE, profiled=FALSE, scale="sd", diag=dm)

# Gaussian on R_+^p:
domain <- make_domain("R+", p=p)
x <- tmvtnorm::rtmvnorm(n, mean = solve(K, eta), sigma = solve(K),
       lower = rep(0, p), upper = rep(Inf, p), algorithm = "gibbs",
       burn.in.samples = 100, thinning = 10)
# Equivalently:
\donttest{
x2 <- gen(n, setting="gaussian", abs=FALSE, eta=eta, K=K, domain=domain,
       finite_infinity=100, xinit=NULL, burn_in=1000, thinning=100, verbose=FALSE)
}
h_hp <- get_h_hp("min_pow", 1, 3)
elts <- get_elts(h_hp, x, "gaussian", domain, centered=TRUE, scale="norm", diag=dm)

# Gaussian on sum(x^2) > 1 && sum(x^(1/3)) > 1 with x allowed to be negative
domain <- make_domain("polynomial", p=p, rule="1 && 2",
       ineqs=list(list("expression"="sum(x^2)>1", abs=FALSE, nonnegative=FALSE),
                      list("expression"="sum(x^(1/3))>1", abs=FALSE, nonnegative=FALSE)))
xinit <- rep(sqrt(2/p), p)
x <- gen(n, setting="gaussian", abs=FALSE, eta=eta, K=K, domain=domain, finite_infinity=100, 
       xinit=xinit, seed=2, burn_in=1000, thinning=100, verbose=FALSE)
h_hp <- get_h_hp("min_pow", 1, 3)
elts <- get_elts(h_hp, x, "gaussian", domain, centered=FALSE,
       profiled_if_noncenter=TRUE, scale="", diag=dm)

# exp on ([0, 1] v [2,3])^p
domain <- make_domain("uniform", p=p, lefts=c(0,2), rights=c(1,3))
x <- gen(n, setting="exp", abs=FALSE, eta=eta, K=K, domain=domain,
       xinit=NULL, seed=2, burn_in=1000, thinning=100, verbose=FALSE)
h_hp <- get_h_hp("min_pow", 1.5, 3)
elts <- get_elts(h_hp, x, "exp", domain, centered=TRUE, scale="", diag=dm)
elts <- get_elts(h_hp, x, "exp", domain, centered=FALSE,
       profiled_if_noncenter=FALSE, scale="", diag=dm)

# gamma on {x1 > 1 && log(1.3) < x2 < 1 && x3 > log(1.3) && ... && xp > log(1.3)}
domain <- make_domain("polynomial", p=p, rule="1 && 2 && 3",
       ineqs=list(list("expression"="x1>1", abs=FALSE, nonnegative=TRUE),
                      list("expression"="x2<1", abs=FALSE, nonnegative=TRUE),
                      list("expression"="exp(x)>1.3", abs=FALSE, nonnegative=TRUE)))
set.seed(1)
xinit <- c(1.5, 0.5, abs(stats::rnorm(p-2))+log(1.3))
x <- gen(n, setting="gamma", abs=FALSE, eta=eta, K=K, domain=domain, finite_infinity=100, 
       xinit=xinit, seed=2, burn_in=1000, thinning=100, verbose=FALSE)
h_hp <- get_h_hp("min_pow", 1.5, 3)
elts <- get_elts(h_hp, x, "gamma", domain, centered=TRUE, scale="", diag=dm)
elts <- get_elts(h_hp, x, "gamma", domain, centered=FALSE,
       profiled_if_noncenter=FALSE, scale="", diag=dm)

# a0.6_b0.7 on {x in R_+^p: sum(log(x))<2 || (x1^(2/3)-1.3x2^(-3)<1 && exp(x1)+2.3*x2>2)}
domain <- make_domain("polynomial", p=p, rule="1 || (2 && 3)",
       ineqs=list(list("expression"="sum(log(x))<2", abs=FALSE, nonnegative=TRUE),
                      list("expression"="x1^(2/3)-1.3x2^(-3)<1", abs=FALSE, nonnegative=TRUE),
                      list("expression"="exp(x1)+2.3*x2^2>2", abs=FALSE, nonnegative=TRUE)))
xinit <- rep(1, p)
x <- gen(n, setting="ab_3/5_7/10", abs=FALSE, eta=eta, K=K, domain=domain, finite_infinity=100, 
       xinit=xinit, seed=2, burn_in=1000, thinning=100, verbose=FALSE)
h_hp <- get_h_hp("min_pow", 1.4, 3)
elts <- get_elts(h_hp, x, "ab_3/5_7/10", domain, centered=TRUE, scale="", diag=dm)
elts <- get_elts(h_hp, x, "ab_3/5_7/10", domain, centered=FALSE,
       profiled_if_noncenter=TRUE, scale="", diag=dm)

# log_log model on {x in R_+^p: sum_j j * xj <= 1}
domain <- make_domain("polynomial", p=p,
       ineqs=list(list("expression"=paste(paste(sapply(1:p,
                           function(j){paste(j, "x", j, sep="")}), collapse="+"), "<1"),
                     abs=FALSE, nonnegative=TRUE)))
x <- gen(n, setting="log_log", abs=FALSE, eta=eta, K=K, domain=domain,
       finite_infinity=100, xinit=NULL, seed=2, burn_in=1000, thinning=100,
       verbose=FALSE)
h_hp <- get_h_hp("min_pow", 2, 3)
elts <- get_elts(h_hp, x, "log_log", domain, centered=TRUE, scale="", diag=dm)
elts <- get_elts(h_hp, x, "log_log", domain, centered=FALSE,
       profiled_if_noncenter=FALSE, scale="", diag=dm)
# Example of using the uniform distance function to boundary as in Liu (2019)
g0 <- function(x) {
       row_min <- apply(x, 1, min)
       row_which_min <- apply(x, 1, which.min)
       dist_to_sum_boundary <- apply(x, 1, function(xx){
                   (1 - sum(1:p * xx)) / sqrt(p*(p+1)*(2*p+1)/6)})
       grad_sum_boundary <- -(1:p) / sqrt(p*(p+1)*(2*p+1)/6)
       g0 <- pmin(row_min, dist_to_sum_boundary)
       g0d <- t(sapply(1:nrow(x), function(i){
          if (row_min[i] < dist_to_sum_boundary[i]){
             tmp <- numeric(ncol(x)); tmp[row_which_min[i]] <- 1
          } else {tmp <- grad_sum_boundary}
          tmp
       }))
       list("g0"=g0, "g0d"=g0d)
}
elts <- get_elts(NULL, x, "exp", domain, centered=TRUE, profiled_if_noncenter=FALSE,
       scale="", diag=dm, unif_dist=g0)

# log_log_sum0 model on the simplex with K having row and column sums 0 (Aitchison model)
domain <- make_domain("simplex", p=p)
K <- -cov_cons("band", p=p, spars=3, eig=1)
diag(K) <- diag(K) - rowSums(K) # So that rowSums(K) == colSums(K) == 0
eigen(K)$val[(p-1):p] # Make sure K has one 0 and p-1 positive eigenvalues
x <- gen(n, setting="log_log_sum0", abs=FALSE, eta=eta, K=K, domain=domain,
       xinit=NULL, seed=2, burn_in=1000, thinning=100, verbose=FALSE)
h_hp <- get_h_hp("min_pow", 2, 3)
h_hp_dx <- h_of_dist(h_hp, x, domain) # h and h' applied to distance from x to boundary

# Does not assume K has 0 row and column sums
elts_simplex_0 <- get_elts(h_hp, x, "log_log", domain, centered=TRUE, profiled=FALSE,
       scale="", diag=1.5)

# If want K to have row sums and column sums equal to 0 (Aitchison); estimate off-diagonals only
elts_simplex_1 <- get_elts(h_hp, x, "log_log_sum0", domain, centered=FALSE,
       profiled=FALSE, scale="", diag=1.5)
# All entries corresponding to the diagonals of K should be 0:
max(abs(sapply(1:p, function(j){c(elts_simplex_1$Gamma_K[j, (j-1)*p+1:p],
       elts_simplex_1$Gamma_K[, (j-1)*p+j])})))
max(abs(diag(elts_simplex_1$Gamma_K_eta)))
max(abs(diag(matrix(elts_simplex_1$g_K, nrow=p))))
}
