#' Estimation of the Lognormal distribution from group data
#'
#' The function \code{fitgroup.ln} implements the estimation of the Lognormal distribution from group
#' data in form of income shares using the non-linear least squares (NLS) and the generalised method of
#' moments (GMM) estimators.
#'
#' @inheritParams fitgroup.gb2
#' @return the function \code{fitgroup.ln} returns the following objects:
#'   \itemize{
#'     \item \code{nls.estimation} Matrix containing the parameters of the Lognormal distribution estimated
#'        by NLS and, if \code{se.nls = TRUE}, their standard errors.
#'     \item \code{nls.rss} Residual sum of squares of the NLS estimation.
#'     \item \code{gmm.estimation} Matrix containing the parameters of the Lognormal distribution estimated
#'        by GMM and, if \code{se.gmm = TRUE}, their standard errors.
#'     \item \code{gmm.rss} Weighted residual sum of squares of the GMM estimation.
#'     \item \code{gini.estimation} Vector with the survey Gini index and the estimated Gini
#'      indices using NLS and GMM whenever possible.
#'   }
#' @details The Generalised Beta of the Second Kind (GB2) is a general class of distributions that is
#' acknowledged to provide an accurate fit to income data (McDonald 1984; McDonald and Mantrala,1995).
#' The Lognormal distribution is a limit case of this model, defined in terms of
#' the cumulative distribution function as follows:
#'
#' \deqn{F(x; \mu, \sigma) = \Phi\bigg( \frac{log(x)- \mu}{\sigma}\bigg)}
#'
#' where \eqn{\mu} is the scale parameter and \eqn{\sigma} is the shape parameter.
#'
#' The function \code{fitgroup.ln} estimates the parameters of the Lognormal distribution using grouped data in form of
#' income shares. These data must have been generated by setting the proportion of observations in each
#' group before sampling, so that the population proportions are fixed, whereas income shares are random
#' variables. Examples of this type of data can be found in the largest datasets of grouped data,
#' including The World Income Inequality Database (UNU-WIDER, 2017), PovcalNet (World Bank, 2018) or the World Wealth
#' and Income Database (Alvaredo et al., 2018).
#'
#' For NLS, numerical optimisation is achieved using the Levenberg-Marquardt Algorithm via
#' \code{\link[minpack.lm]{nlsLM}}. We use the moment estimate of the \eqn{a} parameter, obtained by
#' equating the sample Gini index specified by \code{gini.e} to the population Gini index, as initial value.
#' This method, however, does not provide
#' an estimate for the scale parameter because the Lorenz curve is independent to scale. The scale
#' parameter is estimated by equating the sample mean, specified by \code{pc.inc}, to the population
#' mean of the Lognormal distribution. Because NLS does not use the optimal
#' covariance matrix of the moment conditions, the standard errors of the parameters
#' are obtained by Monte Carlo simulation. Please be aware that the estimation of the standard errors
#' might take a long time, especially if the sample size is large.
#'
#' \code{fitgroup.ln} also implements a two-stage GMM estimator. In the first stage, NLS estimates
#' are obtained as described above, which are used to compute a first stage estimator
#' of the weighting matrix. The weighting matrix is used in the second stage to obtain optimally
#' weighted estimates of the parameters. The numerical optimisation is performed using
#' \code{\link{optim}} with the BFGS method. If \code{optim} reports an error, the L-BFGS method
#' is used. NLS estimates are used as initial values for the optimisation algorithm. The GMM estimation
#'  incorporates the optimal weight matrix, thus making possible to derive the asymptotic standard
#'  errors of the parameters using results from Beach and Davison(1983) and Hajargasht and
#'  Griffiths (2016). As in the NLS estimation, the scale parameter is obtained by matching the
#'  population mean of the Lognormal distribution to the sample mean. Hence, the standard error of the scale
#'  parameter is estimated by Monte Carlo simulation.
#'
#' The Gini index of the Lognormal distribution is computed using the function \code{gini.ln}.
#' \code{gini.ln}.
#'
#'
#' @references
#'  Alvaredo, F., A. Atkinson, T. Piketty, E. Saez, and G. Zucman. The World Wealth and Income Database.
#'  \url{http://www.wid.world}.
#'
#' Beach, C.M. and R. Davidson (1983): Distribution-free statistical inference with
#' Lorenz curves and income shares, \emph{The Review of Economic Studies}, 50, 723 - 735.
#'
#'  Hajargasht, G. and W.E. Griffiths (2016): Inference for Lorenz Curves, Tech. Rep.,
#'  The University of Melbourne.
#'
#'  Jorda, V., Sarabia, J.M., & Jäntti, M. (2018). Estimation of income inequality from grouped data.
#'  arXiv preprint arXiv:1808.09831.
#'
#'  McDonald, J.B. (1984): Some Generalized Functions for the Size Distribution of Income,
#'  \emph{Econometrica}, 52, 647 - 665.
#'
#'  McDonald, J.B. and A. Mantrala (1995): The distribution of personal income: revisited,
#'  \emph{Journal of Applied Econometrics}, 10, 201 - 204.
#'
#'  UNU-WIDER (2018). World Income Inequality Database (WIID3.4).
#'  \url{https://www.wider.unu.edu/project/wiid-world-income-inequality-database}.
#'
#'  World Bank (2018). PovcalNet Data Base. Washington, DC: World Bank. \url{http://iresearch.worldbank.org/PovcalNet/home.aspx}.
#'
#' @export
#' @examples
#' fitgroup.ln(y = c(9, 13, 17, 22, 39), gini.e = 0.29)
#'
#' @importFrom minpack.lm nlsLM nls.lm.control
#' @importFrom stats resid coef uniroot optim
#' @export
#'
fitgroup.ln <- function(y, x = rep(1 / length(y), length(y)), gini.e, pc.inc = NULL, se.gmm = FALSE, se.nls = FALSE, se.scale = FALSE, N = NULL, nrep = 10^3, grid = 1:20, rescale = 1000, gini = FALSE) {
  if(length(y) != length(x)) {
    stop("x and y must be of the same length")
  }
  if(length(y) < 4) {
    stop("At least four points of the Lorenz curve are required to perform the estimation")
  }
  if(!is.numeric(gini.e)) {
    stop("Gini index is not numeric")
  }
  if(gini.e < 0 | gini.e > 1) {
    stop("Gini index must be between 0 and 1")
  }
  if(!is.numeric(pc.inc) & !is.null(pc.inc)) {
    stop("Per capita income is not numeric")
  }
  if(is.numeric(pc.inc)) {
    if(pc.inc <= 0){
      stop("Per capita GDP should be positive")
    }
  }
  if(!is.numeric(rescale)) {
    stop("Rescale is not numeric")
  }
  if(rescale <= 0) {
    stop("Rescale must be positive")
  }
  if(sum(grid <= 0) != 0) {
    stop("Grid must be a secuence of positive numbers")
  }
  share <- as.vector(y[!is.na(y)])
  share <- as.numeric(share)/sum(share)
  share <- cumsum(share)[-length(share)]
  cprob <- as.vector(x[!is.na(x)])
  cprob <- as.numeric(cprob)/sum(cprob)
  cprob <- cumsum(cprob)[-length(cprob)]

  par.s <- qnorm((gini.e + 1) / 2) * 2^0.5
  regress <- try(suppressWarnings(nlsLM(share ~ (lc.ln(S, cprob)), algorithm ="port",
    start = list(S = par.s), lower = 0, control = nls.lm.control(maxiter=1000))), silent = TRUE)
  nls.rss <- sum(resid(regress)^2)
  nls.s <- coef(regress)[1]

  if(is.null(pc.inc)) {
    temp.b <- NA
    print("Unable to compute the scale parameter and the GMM estimation. Per capita GDP not provided")
  }
  else{
    if(log(pc.inc) <= nls.s^2 / 2) {
      temp.b <- NA
      print("Unable to compute the scale parameter and the GMM estimation")
    }
    if(log(pc.inc) > nls.s^2 / 2) {
      incpc <- pc.inc / rescale
      temp.b <- scale.ln(nls.s, incpc)
    }
  }
  if(is.na(temp.b)) {
    gmm.coef <- matrix(NA, 1, 2)
    gmm.se <- matrix(NA, 1, 2)
    gmm.rss <- NA
  }
  else{
    regress <- try(opt.gmm.ln(cprob, share, init.est = c(nls.s, temp.b), cons.est = c(nls.s, temp.b)))
    if('try-error'%in%class(regress)) {
      print("Unable to compute GMM estimates of the parameters. The weight martrix cannot be inverted. Try changing the value of rescale")
      gmm.coef <- matrix(NA, 1, 2)
      gmm.se <- matrix(NA, 1, 2)
      gmm.rss <- NA
    }
    else {
      gmm.rss <- regress$opt1$value
      gmm.coef <- regress$opt1$par
      gmm.coef[2] <- scale.ln(gmm.coef[1], incpc)
      gmm.se <- matrix(NA, 1, 2)
    }
  }
  nls.estimation <- matrix(NA, 2, 2)
  nls.coef <- matrix(c(nls.s, temp.b), 1, 2)
  nls.se <- matrix(NA, 1, 2)

  if (se.nls == TRUE) {
    if (is.null(N)) {
      print("Unable to compute the standard errors. Please provide the sample size")
    }
    else {
      se.calc <- simsd.ln(x = x, theta = nls.coef, N = N, nrep = nrep, se.scale = se.scale)
      nls.se <- se.calc$nls.se
      if(!is.na(temp.b)) {
        if(se.gmm == TRUE){
          gmm.se <- gmmse.ln(gmm.coef, cprob, N)
        }
        if(se.scale == TRUE){
          gmm.se[2] <- se.calc$sd.scale
        }
      }
    }
  }
  if (se.nls == FALSE & se.gmm == TRUE) {
    if (is.null(N)) {
      print("Unable to compute the standard errors. Please provide the sample size")
    }
    else {
      if(!is.na(temp.b)) {
        gmm.se <- gmmse.ln(gmm.coef, cprob, N)
        if(se.scale == TRUE){
          gmm.se[2] <- simsd.ln(x = x, theta = nls.coef, N = N, nrep = nrep, se.scale = se.scale)$sd.scale
        }
      }
    }
  }
  nls.estimation[1, ] <- nls.coef
  nls.estimation[2, ] <- nls.se
  colnames(nls.estimation) <- c("s", "mu")
  row.names(nls.estimation) <- c("Coef.", "se")

  gmm.estimation <- matrix(NA, 2, 2)
  gmm.estimation[1, ] <- gmm.coef
  gmm.estimation[2, ] <- gmm.se
  colnames(gmm.estimation) <- c("s", "mu")
  row.names(gmm.estimation) <- c("Coef.", "se")

  if (gini == TRUE) {
    if (!is.na(gmm.rss)) {
      gmm.gini <- gini.ln(gmm.coef)
    }
    else {gmm.gini <- NA}
    nls.gini <- gini.ln(nls.coef)
    gini.estimation <- matrix(NA, 1, 3)
    colnames(gini.estimation) <- c("Survey", "NLS estimate", "GMM estimate")
    gini.estimation[1] <- gini.e
    gini.estimation[2] <- nls.gini
    gini.estimation[3] <- gmm.gini
    out2 <- list(nls.estimation = nls.estimation, nls.rss = nls.rss, gmm.estimation = gmm.estimation, gmm.rss = gmm.rss,
      gini.estimation = gini.estimation)
  }
  else {
    out2 <- list(nls.estimation = nls.estimation, nls.rss = nls.rss, gmm.estimation = gmm.estimation, gmm.rss = gmm.rss)
  }
  return(out2)
}

