% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/getProteome.R
\name{getProteome}
\alias{getProteome}
\title{Proteome Retrieval}
\usage{
getProteome(db = "refseq", kingdom, organism,
  path = file.path("_ncbi_downloads", "proteomes"))
}
\arguments{
\item{db}{a character string specifying the database from which the proteome shall be retrieved: 'refseq'.
Right now only the ref seq database is included. Later version of \pkg{biomartr} will also allow
sequence retrieval from additional databases.}

\item{kingdom}{a character string specifying the kingdom of the organisms of interest,
e.g. "archaea","bacteria", "fungi", "invertebrate", "plant", "protozoa", "vertebrate_mammalian", or "vertebrate_other".}

\item{organism}{a character string specifying the scientific name of the organism of interest, e.g. 'Arabidopsis thaliana'.}

\item{path}{a character string specifying the location (a folder) in which the corresponding
proteome shall be stored. Default is \code{path} = \code{file.path("_ncbi_downloads","proteomes")}.}
}
\value{
A data.table storing the geneids in the first column and the protein dequence in the second column.
}
\description{
This function retrieves a fasta-file storing the proteome of an organism of interest and stores
the proteome file in the folder '_ncbi_downloads/proteomes'.
}
\details{
Internally this function loads the the overview.txt file from NCBI:

 refseq: \url{ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/}


and creates a directory '_ncbi_downloads/proteomes' to store
the proteome of interest as fasta file for future processing.
In case the corresponding fasta file already exists within the
'_ncbi_downloads/genomes' folder and is accessible within the workspace,
no download process will be performed.
}
\examples{
\dontrun{

# download the proteome of Arabidopsis thaliana from refseq
# and store the corresponding proteome file in '_ncbi_downloads/proteomes'
getProteome( db       = "refseq", 
             kingdom  = "plant", 
             organism = "Arabidopsis thaliana", 
             path     = file.path("_ncbi_downloads","proteomes") )


file_path <- file.path("_ncbi_downloads","proteomes","Arabidopsis_thaliana_protein.faa.gz")
Ath_proteome <- read_proteome(file_path, format = "fasta")


}
}
\author{
Hajk-Georg Drost
}
\references{
\url{ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq}

\url{http://www.ncbi.nlm.nih.gov/refseq/about/}
}
\seealso{
\code{\link{read_proteome}}
}

