% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PINstimation.R
\docType{package}
\name{PINstimation-package}
\alias{PINstimation-package}
\title{An R package for estimating the probability of informed trading}
\description{
The package provides utilities for the estimation
of probability of informed trading measures: original PIN (\code{PIN}) as
introduced by \insertCite{Easley1992;textual}{PINstimation} and
\insertCite{Easley1996;textual}{PINstimation}
, multilayer PIN (\code{MPIN}) as introduced by
\insertCite{Ersan2016;textual}{PINstimation}, adjusted PIN (\code{AdjPIN}) model
as introduced in \insertCite{Duarte09;textual}{PINstimation}, and
volume-synchronized PIN (\code{VPIN}) as introduced by
\insertCite{Easley2011;textual}{PINstimation} and
\insertCite{Easley2012;textual}{PINstimation}. Estimations of
\code{PIN}, \code{MPIN}, and \code{adjPIN} are subject to floating-point exception
error, and are sensitive to the choice of initial values.
Therefore, researchers developed factorizations of the model likelihood
functions as well as algorithms for determining initial parameter sets for
the maximum likelihood estimation - (MLE henceforth).
\cr\cr\cr As for the factorizations, the package includes three
different factorizations of the \code{PIN} likelihood function :\code{fact_pin_eho()}
as in \insertCite{Easley2010;textual}{PINstimation}, \code{fact_pin_lk()} as in
\insertCite{WilliamLin2011;textual}{PINstimation}, and \code{fact_pin_e()} as in
\insertCite{Ersan2016;textual}{PINstimation};
one factorization for \code{MPIN} likelihood function: \code{fact_mpin()} as in
\insertCite{Ersan2016;textual}{PINstimation}; and one factorization for
\code{AdjPIN} likelihood function: \code{fact_adjpin()} as in
\insertCite{Ersan2022b;textual}{PINstimation}.
\cr\cr The package implements three algorithms to generate initial
parameter sets for the MLE of the \code{PIN} model in: \code{initials_pin_yz()}
for the algorithm of \insertCite{Yan2012;textual}{PINstimation},
\code{initials_pin_gwj()} for the algorithm of
\insertCite{Gan2015;textual}{PINstimation}, and \code{initials_pin_ea()} for the
algorithm of \insertCite{ErsanAlici2016;textual}{PINstimation}. As for the
initial parameter sets for the MLE of the \code{MPIN} model, the function
\code{initials_mpin()} implements a multilayer extension of the algorithm of
\insertCite{ErsanAlici2016;textual}{PINstimation}. Finally, three functions
implement three algorithms of initial parameter sets for the MLE of
the \code{AdjPIN} model, namely \code{initials_adjpin()} for the algorithm in
\insertCite{Ersan2022b;textual}{PINstimation}, \code{initials_adjpin_cl()}
for the algorithm of \insertCite{ChengLai2021;textual}{PINstimation}; and
\code{initials_adjpin_rnd()} for randomly generated initial parameter sets.
The choice of the initial parameter sets can be done directly, either using
specific functions implementing MLE for the PIN model, such as, \code{pin_yz()},
\code{pin_gwj()}, \code{pin_ea()}; or through the argument \code{initialsets} in generic
functions implementing MLE for the \code{MPIN} and \code{AdjPIN} models, namely
\code{mpin_ml()}, and \code{adjpin()}.
Besides, \code{PIN}, \code{MPIN} and \code{AdjPIN} models can be estimated using custom
initial parameter set(s) provided by the user and fed through
the argument \code{initialsets} for the functions \code{pin()}, \code{mpin_ml()} and
\code{adjpin()}. Through the function \code{get_posteriors()}, the package also
allows users to assign, for each day in the sample, the posterior
probability that the day is a no-information day, good-information day
and bad-information day.
\cr\cr As an alternative to the standard maximum likelihood estimation,
estimation via expectation conditional maximization algorithm (\code{ECM})
is suggested in \insertCite{Ghachem2022;textual}{PINstimation}, and is
implemented through the function \code{mpin_ecm()} for the \code{MPIN} model, and
the function \code{adjpin()} for the \code{AdjPIN} model.
\cr\cr Dataset(s) of daily aggregated numbers of buys and sells with user
determined number of information layers can be simulated with the function
\code{generatedata_mpin()} for the \code{MPIN} (\code{PIN}) model;
and \code{generatedata_adjpin()}
for the \code{AdjPIN} model. The output of these functions contains the
theoretical parameters used in the data generation, empirical parameters
computed from the generated data, alongside the generated data itself.
Data simulation functions allow for broad customization
to produce data that fit the user's preferences. Therefore, simulated data
series can be utilized in comparative analyses for the applied methods in
different scenarios. Alternatively, the user can use two example datasets
preloaded in the package: \code{dailytrades} as a representative of a quarterly
trade data with daily buys and sells; and \code{hfdata} as a simulated
high-frequency dataset comprising \verb{100 000} trades.
\cr\cr Finally, the package provides two functions to deal with
high-frequency data.
First, the function \code{vpin()} estimates and provides detailed output on the
order flow toxicity metric, volume-synchronized probability of informed
trading, as developed in \insertCite{Easley2011;textual}{PINstimation} and
\insertCite{Easley2012;textual}{PINstimation}. Second, the function
\code{aggregate_trades()} aggregates the high-frequency trade-data into daily
data using several trade classification algorithms, namely the \code{tick}
algorithm, the \code{quote} algorithm, \code{LR} algorithm
\insertCite{LeeReady1991}{PINstimation} and the \code{EMO}
algorithm \insertCite{Ellis2000}{PINstimation}.
\cr\cr The package provides fast, compact, and precise utilities to tackle
the sophisticated, error-prone, and time-consuming estimation procedure of
informed trading, and this solely using the raw trade-level data.
\insertCite{Ghachem2022b;textual}{PINstimation}
provides comprehensive overview of the package: it first
details the underlying theoretical background, provides a thorough
description of the functions, before using them to tackle relevant
research questions.
}
\section{Functions}{

\itemize{
\item \link{adjpin} estimates the adjusted probability of informed trading
(\code{AdjPIN}) of the model of \insertCite{Duarte09;textual}{PINstimation}.
\item \link{aggregate_trades} aggregates the trading data per day using
different trade classification algorithms.
\item \link{detectlayers_e} detects the number of information layers present
in the trade-data using the algorithm in
\insertCite{Ersan2016;textual}{PINstimation}.
\item \link{detectlayers_eg} detects the number of information layers present
in the trade-data using the algorithm in
\insertCite{Ersan2022a;textual}{PINstimation}.
\item \link{detectlayers_ecm} detects the number of information layers
present in the trade-data using the expectation-conditional maximization
algorithm in \insertCite{Ghachem2022;textual}{PINstimation}.
\item \link{fact_adjpin} returns the \code{AdjPIN} factorization of the likelihood
function by \insertCite{Ersan2022b;textual}{PINstimation} evaluated at the
provided data and parameter sets.
\item \link{fact_pin_e} returns the \code{PIN} factorization of the likelihood
function by \insertCite{Ersan2016;textual}{PINstimation} evaluated at
the provided data and parameter sets.
\item \link{fact_pin_eho} returns the \code{PIN} factorization of the likelihood
function by \insertCite{Easley2010;textual}{PINstimation} evaluated at the
provided data and parameter sets.
\item \link{fact_pin_lk} returns the \code{PIN} factorization of the likelihood
function by \insertCite{WilliamLin2011;textual}{PINstimation} evaluated
at the provided data and parameter sets.
\item \link{fact_mpin} returns the \code{MPIN} factorization of the likelihood
function by \insertCite{Ersan2016;textual}{PINstimation} evaluated at the
provided data and parameter sets.
\item \link{generatedata_adjpin} generates a dataset object or a list of
dataset objects generated according to the assumptions of the \code{AdjPIN} model.
\item \link{generatedata_mpin} generates a dataset object or a list of
dataset objects generated according to the assumptions of the \code{MPIN} model.
\item \link{get_posteriors} computes, for each day in the sample, the
posterior probabilities that it is a no-information day, good-information day
and bad-information day respectively.
\item \link{initials_adjpin} generates the initial parameter sets for the
\code{ML}/\code{ECM} estimation of the adjusted probability of informed trading using
the algorithm of \insertCite{Ersan2022b;textual}{PINstimation}.
\item \link{initials_adjpin_cl} generates the initial parameter sets for the
\code{ML}/\code{ECM} estimation of the adjusted probability of informed trading using
an extension of the algorithm of
\insertCite{ChengLai2021;textual}{PINstimation}.
\item \link{initials_adjpin_rnd} generates random parameter sets for the
estimation of the \code{AdjPIN} model.
\item \link{initials_mpin} generates initial parameter sets for the maximum
likelihood estimation of the multilayer
probability of informed trading (\code{MPIN}) using the
\insertCite{Ersan2016;textual}{PINstimation} generalization of the algorithm
in \insertCite{ErsanAlici2016;textual}{PINstimation}.
\item \link{initials_pin_ea} generates the initial parameter sets for the
maximum likelihood estimation of the probability of informed trading (\code{PIN})
using the algorithm of \insertCite{ErsanAlici2016;textual}{PINstimation}.
\item \link{initials_pin_gwj} generates the initial parameter set for the
maximum likelihood estimation of the probability of informed trading (\code{PIN})
using the algorithm of \insertCite{Gan2015;textual}{PINstimation}.
\item \link{initials_pin_yz} generates the initial parameter sets for the
maximum likelihood estimation of the probability of informed trading (\code{PIN})
using the algorithm of \insertCite{Yan2012;textual}{PINstimation}.
\item \link{mpin_ecm} estimates the multilayer probability of informed
trading (\code{MPIN}) using the expectation-conditional maximization algorithm
(\code{ECM}) as in \insertCite{Ghachem2022;textual}{PINstimation}.
\item \link{mpin_ml} estimates the multilayer probability of informed trading
(\code{MPIN}) using layer detection algorithms in
\insertCite{Ersan2016;textual}{PINstimation}, and
\insertCite{Ersan2022a;textual}{PINstimation}; and standard maximum
likelihood estimation.
\item \link{pin} estimates the probability of informed trading (\code{PIN}) using
custom initial parameter set(s) provided by the user.
\item \link{pin_bayes} estimates the probability of informed trading (\code{PIN}) using
the Bayesian approach in \insertCite{griffin2021;textual}{PINstimation}.
\item \link{pin_ea} estimates the probability of informed trading (\code{PIN})
using the initial parameter sets from the algorithm of
\insertCite{ErsanAlici2016;textual}{PINstimation}.
\item \link{pin_gwj} estimates the probability of informed trading (\code{PIN})
using the initial parameter set from the algorithm of
\insertCite{Gan2015;textual}{PINstimation}.
\item \link{pin_yz} estimates the probability of informed trading (\code{PIN})
using the initial parameter sets from the grid-search algorithm of
\insertCite{Yan2012;textual}{PINstimation}.
\item \link{vpin} estimates the volume-synchronized probability of informed
trading (\code{VPIN}).
}
}

\section{Datasets}{

\itemize{
\item \link{dailytrades} A dataframe representative of quarterly (60
trading days) data of simulated daily buys and sells.
\item \link{hfdata} A dataframe containing simulated high-frequency
trade-data on 100 000 timestamps with the variables
\verb{\{timestamp, price, volume, bid, ask\}}.
}
}

\section{Estimation results}{

\itemize{
\item \link{estimate.adjpin-class} The class \code{estimate.adjpin} stores the
estimation results of the function \code{adjpin()}.
\item \link{estimate.mpin-class} The class \code{estimate.mpin} stores the
estimation results of the \code{MPIN} model as estimated by the function
\code{mpin_ml()}.
\item \link{estimate.mpin.ecm-class} The class \code{estimate.mpin.ecm} stores
the estimation results of the \code{MPIN} model as estimated by the function
\code{mpin_ecm()}.
\item \link{estimate.pin-class} The class \code{estimate.pin} stores the
estimation results of the following \code{PIN} functions: \code{pin(), pin_yz(),
  pin_gwj()}, and \code{pin_ea()}.
\item \link{estimate.vpin-class} The class \code{estimate.vpin} stores the
estimation results of the \code{VPIN} model using the function \code{vpin()}.
}
}

\section{Data simulation}{

\itemize{
\item \link{dataset-class} The class \code{dataset} stores the result of
simulation of the aggregate daily trading data.
\item \link{data.series-class} The class \code{data.series} stores a list of
\code{dataset}.
}
}

\references{
\insertAllCited
}
\author{
Montasser Ghachem \href{mailto:montasser.ghachem@pinstimation.com}{montasser.ghachem@pinstimation.com} \cr
Department of Economics at Stockholm University, Stockholm, Sweden. \cr

Oguz Ersan \href{mailto:oguz.ersan@pinstimation.com}{oguz.ersan@pinstimation.com} \cr
Department of International Trade and Finance at Kadir Has University,
Istanbul, Turkey.\cr
}
