% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/getProteome.R
\name{getProteome}
\alias{getProteome}
\title{Proteome Retrieval}
\usage{
getProteome(db = "refseq", organism, reference = TRUE,
  release = NULL, gunzip = FALSE, path = file.path("_ncbi_downloads",
  "proteomes"))
}
\arguments{
\item{db}{a character string specifying the database from which the genome 
shall be retrieved:
\itemize{
\item \code{db = "refseq"}
\item \code{db = "genbank"}
\item \code{db = "ensembl"}
\item \code{db = "uniprot"}
}}

\item{organism}{there are three options to characterize an organism: 
\itemize{
\item by \code{scientific name}: e.g. \code{organism = "Homo sapiens"}
\item by \code{database specific accession identifier}: e.g. \code{organism = "GCF_000001405.37"} (= NCBI RefSeq identifier for \code{Homo sapiens})
\item by \code{taxonomic identifier from NCBI Taxonomy}: e.g. \code{organism = "9606"} (= taxid of \code{Homo sapiens})
}}

\item{reference}{a logical value indicating whether or not a genome shall be downloaded if it isn't marked in the database as either a reference genome or a representative genome.}

\item{release}{the database release version of ENSEMBL (\code{db = "ensembl"}). Default is \code{release = NULL} meaning
that the most recent database version is used.}

\item{gunzip}{a logical value indicating whether or not files should be unzipped.}

\item{path}{a character string specifying the location (a folder) in which 
the corresponding proteome shall be stored. Default is 
\code{path} = \code{file.path("_ncbi_downloads","proteomes")}.}
}
\value{
File path to downloaded proteome.
}
\description{
Main proteome retrieval function for an organism of interest.
By specifying the scientific name of an organism of interest the 
corresponding fasta-file storing the proteome of the organism of interest
can be downloaded and stored locally. Proteome files can be retrieved from 
several databases.
}
\details{
Internally this function loads the the overview.txt file from NCBI:

 refseq: ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/
 
 genbank: ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/

and creates a directory '_ncbi_downloads/proteomes' to store
the proteome of interest as fasta file for future processing.
}
\examples{
\dontrun{

# download the proteome of Arabidopsis thaliana from refseq
# and store the corresponding proteome file in '_ncbi_downloads/proteomes'
file_path <- getProteome( db       = "refseq", 
             organism = "Arabidopsis thaliana", 
             path     = file.path("_ncbi_downloads","proteomes") )

Ath_proteome <- read_proteome(file_path, format = "fasta")

# download the proteome of Arabidopsis thaliana from genbank
# and store the corresponding proteome file in '_ncbi_downloads/proteomes'
file_path <- getProteome( db       = "genbank", 
             organism = "Arabidopsis thaliana", 
             path     = file.path("_ncbi_downloads","proteomes") )

Ath_proteome <- read_proteome(file_path, format = "fasta")
}
}
\seealso{
\code{\link{getGenome}}, \code{\link{getCDS}}, \code{\link{getGFF}},
\code{\link{getRNA}}, \code{\link{getRepeatMasker}}, 
\code{\link{getAssemblyStats}}, \code{\link{meta.retrieval}}, 
\code{\link{read_proteome}}
}
\author{
Hajk-Georg Drost
}
