% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/blast.R
\name{assign_blastn}
\alias{assign_blastn}
\title{Assign taxonomy using blastn algorithm and the blast software}
\usage{
assign_blastn(
  physeq,
  ref_fasta = NULL,
  database = NULL,
  blastpath = NULL,
  behavior = c("return_matrix", "add_to_phyloseq"),
  method = c("vote", "top-hit"),
  suffix = "_blastn",
  min_id = 95,
  min_bit_score = 50,
  min_cover = 95,
  min_e_value = 1e-30,
  nb_voting = NULL,
  column_names = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"),
  vote_algorithm = c("consensus", "rel_majority", "abs_majority", "unanimity"),
  strict = FALSE,
  nb_agree_threshold = 1,
  preference_index = NULL,
  collapse_string = "/",
  replace_collapsed_rank_by_NA = TRUE,
  simplify_taxo = TRUE,
  keep_blast_metrics = FALSE,
  ...
)
}
\arguments{
\item{physeq}{(required): a \code{\link[phyloseq]{phyloseq-class}} object obtained
using the \code{phyloseq} package.}

\item{ref_fasta}{Either a DNAStringSet object or a path to a fasta
file to make the blast database. It must be in sintax format.
See \code{\link[=assign_sintax]{assign_sintax()}}.}

\item{database}{path to a blast database. Only used if ref_fasta
is not set.}

\item{blastpath}{path to blast program.}

\item{behavior}{Either "return_matrix" (default), or "add_to_phyloseq":
\itemize{
\item "return_matrix" return a list of two matrix with taxonomic value in the
first element of the list and bootstrap value in the second one.
\item "add_to_phyloseq" return a phyloseq object with amended slot \verb{@taxtable}.
Only available if using physeq input and not seq2search input.
}}

\item{method}{(One of "vote" or "top-hit"). If top-hit, only the
better match is used to assign taxonomy. If vote, the algorithm
takes all (or \code{nb_voting} if \code{nb_voting} is not null) select assignation
and resolve the conflict using the function \code{\link[=resolve_vector_ranks]{resolve_vector_ranks()}}.}

\item{suffix}{(character) The suffix to name the new columns.
If set to "" (the default), the taxa_ranks algorithm is used
without suffix.}

\item{min_id}{(default: 95) the identity percent to take into account
a references taxa}

\item{min_bit_score}{(default: 50) the minimum bit score to take
into account a references taxa}

\item{min_cover}{(default: 50) cut of in query cover (\%) to keep result}

\item{min_e_value}{(default: 1e-30)  cut of in e-value (\%) to keep result
The BLAST E-value is the number of expected hits of similar quality (score)
that could be found just by chance.}

\item{nb_voting}{(Int, default NULL). The number of taxa to keep before apply
a vote to resolve conflict. If NULL all taxa passing the filters (min_id,
min_bit_score, min_cover and min_e_value) are selected.}

\item{column_names}{A vector of names for taxonomic ranks. Must
correspond to names in the ref_fasta files.}

\item{vote_algorithm}{the method to vote among "consensus", "rel_majority",
"abs_majority" and "unanimity". See \code{\link[=resolve_vector_ranks]{resolve_vector_ranks()}} for more details.}

\item{strict}{(Logical, default FALSE). See \code{\link[=resolve_vector_ranks]{resolve_vector_ranks()}} for more details.}

\item{nb_agree_threshold}{See \code{\link[=resolve_vector_ranks]{resolve_vector_ranks()}} for more details.}

\item{preference_index}{See \code{\link[=resolve_vector_ranks]{resolve_vector_ranks()}} for more details.}

\item{collapse_string}{See \code{\link[=resolve_vector_ranks]{resolve_vector_ranks()}} for more details.}

\item{replace_collapsed_rank_by_NA}{(Logical, default TRUE) See \code{\link[=resolve_vector_ranks]{resolve_vector_ranks()}} for more details.}

\item{simplify_taxo}{(logical default TRUE). Do we apply the
function \code{\link[=simplify_taxo]{simplify_taxo()}} to the phyloseq object?}

\item{keep_blast_metrics}{(Logical, default FALSE). If TRUE, the blast metrics
("Query seq. length", "Taxa seq. length", "Alignment length",  "\% id. match", "e-value",
"bit score" and "Query cover") are stored in the tax_table.}

\item{...}{Additional arguments passed on to \code{\link[=blast_pq]{blast_pq()}}}
}
\value{
\itemize{
\item If behavior == "return_matrix" :
\itemize{
\item If method = "top-hit" a matrix of taxonomic assignation
\item If method = "vote", a list of two matrix, the first is the
raw taxonomic assignation (before vote). The second one is
the taxonomic assignation in which conflicts are resolved
using vote.
}
\item If behavior == "add_to_phyloseq", return a new phyloseq object
}
}
\description{
\if{html}{\out{
<a href="https://adrientaudiere.github.io/MiscMetabar/articles/Rules.html#lifecycle">
<img src="https://img.shields.io/badge/lifecycle-experimental-orange" alt="lifecycle-experimental"></a>
}}


Use the blast software.
}
\examples{
\dontrun{
ref_fasta <- Biostrings::readDNAStringSet(system.file("extdata",
  "mini_UNITE_fungi.fasta.gz",
  package = "MiscMetabar", mustWork = TRUE
))

# assign_blastn(data_fungi_mini, ref_fasta = ref_fasta) # error because not
# enough sequences in db so none blast query passed the filters.
# So we used low score filter hereafter.

mat <- assign_blastn(data_fungi_mini,
  ref_fasta = ref_fasta,
  method = "top-hit", min_id = 70, min_e_value = 1e-3, min_cover = 50,
  min_bit_score = 20
)
head(mat)

assign_blastn(data_fungi_mini,
  ref_fasta = ref_fasta, method = "vote",
  vote_algorithm = "rel_majority", min_id = 90, min_cover = 50,
  behavior = "add_to_phyloseq"
)@tax_table

assign_blastn(data_fungi_mini,
  ref_fasta = ref_fasta, method = "vote",
  vote_algorithm = "consensus", replace_collapsed_rank_by_NA = FALSE,
  min_id = 90, min_cover = 50, behavior = "add_to_phyloseq"
)@tax_table
}
}
\author{
Adrien Taudière
}
