% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PDE.R
\name{PDE_pdfs2table}
\alias{PDE_pdfs2table}
\title{Extracting all tables from a PDF (Portable Document Format) file}
\usage{
PDE_pdfs2table(
  pdfs,
  out = ".",
  table.heading.words = "",
  ignore.case.th = FALSE,
  out.table.format = ".csv (WINDOWS-1252)",
  dev_x = 20,
  dev_y = 9999,
  write.table.locations = FALSE,
  exp.nondetc.tabs = TRUE,
  delete = TRUE,
  verbose = TRUE
)
}
\arguments{
\item{pdfs}{String. A list of paths to the PDF files to be analyzed.}

\item{out}{String. Directory chosen to save tables in. Default:
\code{"."}.}

\item{table.heading.words}{List of strings. Different than standard (TABLE,
TAB or table plus number) headings to be detected. Regex rules apply (see
also
\url{https://github.com/erikstricker/PDE/blob/master/inst/examples/cheetsheets/regex.pdf}).
 Default = \code{""}.}

\item{ignore.case.th}{Logical. Are the additional table headings (see
\code{table.heading.words}) case-sensitive (does capitalization matter)?
Default = \code{FALSE}.}

\item{out.table.format}{String. Output file format. Either comma separated
file \code{.csv} or tab separated file \code{.tsv}. The encoding indicated
in parantheses should be selected according to the operational system 
exported tables are opened in, i.e., Windows: \code{"(WINDOWS-1252)"}; Mac: 
\code{(macintosh)}; Linux: \code{(UTF-8)}. Default: \code{".csv"} and 
encoding depending on the operational system.}

\item{dev_x}{Numeric. For a table the size of indention which would be
considered the same column. Default: \code{20}.}

\item{dev_y}{Numeric. For a table the vertical distance which would be
considered the same row. Can be either a number or set to dynamic detection 
[9999], in which case the font size is used to detect which words are in the 
same row. 
Default: \code{9999}.}

\item{write.table.locations}{Logical. If \code{TRUE}, a separate file with the
headings of all tables, their relative location in the generated html and
txt files, as well as information if search words were found will be
generated. Default: \code{FALSE}.}

\item{exp.nondetc.tabs}{Logical. If \code{TRUE}, if a table was detected in a
PDF file but is an image or cannot be read, the page with the table with be
exported as a png. Default: \code{FALSE}.}

\item{delete}{Logical. If \code{TRUE}, the intermediate \strong{txt},
\strong{keeplayouttxt} and \strong{html} copies of the PDF file will be 
deleted. Default: \code{TRUE}.}

\item{verbose}{Logical. Indicates whether messages will be printed in the console. Default: \code{TRUE}.}
}
\description{
\code{PDE_pdfs2table} extracts all tables from a single PDF
file and writes output in the corresponding folder.
}
\examples{
## Running a simple table extraction
if(PDE_check_Xpdf_install() == TRUE){
outputtables <- PDE_pdfs2table(pdf = paste0(system.file(package = "PDE"),
                 "/examples/Methotrexate/29973177_!.pdf"),
 out = paste0(system.file(package = "PDE"),"/examples/29973177_tables/"))
}

## Running a the same table extraction as above with all paramaters shown
if(PDE_check_Xpdf_install() == TRUE){
 outputtables <- PDE_pdfs2table(pdf = paste0(system.file(package = "PDE"),
                                 "/examples/Methotrexate/29973177_!.pdf"),
 out = paste0(system.file(package = "PDE"),"/examples/29973177_tables/"),
 dev_x = 20,
 dev_y = 9999,
 table.heading.words = "",
 ignore.case.th = FALSE,
 out.table.format = ".csv (WINDOWS-1252)",
 write.table.locations = FALSE,
 exp.nondetc.tabs = FALSE,
 delete = TRUE)
}

}
\seealso{
\code{\link{PDE_extr_data_from_pdfs}},\code{\link{PDE_pdfs2table_searchandfilter}}
}
