% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kerasOptimizer.R
\name{optimizer_adam}
\alias{optimizer_adam}
\title{Adam optimizer}
\usage{
optimizer_adam(
  learning_rate = 0.001,
  beta_1 = 0.9,
  beta_2 = 0.999,
  epsilon = NULL,
  decay = 0,
  amsgrad = FALSE,
  clipnorm = NULL,
  clipvalue = NULL,
  ...
)
}
\arguments{
\item{learning_rate}{float >= 0. Learning rate.}

\item{beta_1}{The exponential decay rate for the 1st moment estimates. float,
0 < beta < 1. Generally close to 1.}

\item{beta_2}{The exponential decay rate for the 2nd moment estimates. float,
0 < beta < 1. Generally close to 1.}

\item{epsilon}{float >= 0. Fuzz factor. If `NULL`, defaults to `k_epsilon()`.}

\item{decay}{float >= 0. Learning rate decay over each update.}

\item{amsgrad}{Whether to apply the AMSGrad variant of this algorithm from
the paper "On the Convergence of Adam and Beyond".}

\item{clipnorm}{Gradients will be clipped when their L2 norm exceeds this
value.}

\item{clipvalue}{Gradients will be clipped when their absolute value exceeds
this value.}

\item{...}{Unused, present only for backwards compatability}
}
\description{
Adam optimizer as described in [Adam - A Method for Stochastic
Optimization](https://arxiv.org/abs/1412.6980v8).
}
\note{
Default parameters follow those provided in the original paper.
}
\section{References}{

  - [Adam - A Method for Stochastic Optimization](https://arxiv.org/abs/1412.6980v8)
  - [On the Convergence of Adam and Beyond](https://openreview.net/forum?id=ryQu7f-RZ)
}

\seealso{
Other optimizers: 
\code{\link{optimizer_adadelta}()},
\code{\link{optimizer_adagrad}()},
\code{\link{optimizer_adamax}()},
\code{\link{optimizer_nadam}()},
\code{\link{optimizer_rmsprop}()},
\code{\link{optimizer_sgd}()}
}
\concept{optimizers}
