% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ml_feature_bucketizer.R
\name{ft_bucketizer}
\alias{ft_bucketizer}
\title{Feature Transformation -- Bucketizer (Transformer)}
\usage{
ft_bucketizer(x, input_col = NULL, output_col = NULL, splits = NULL,
  input_cols = NULL, output_cols = NULL, splits_array = NULL,
  handle_invalid = "error", uid = random_string("bucketizer_"), ...)
}
\arguments{
\item{x}{A \code{spark_connection}, \code{ml_pipeline}, or a \code{tbl_spark}.}

\item{input_col}{The name of the input column.}

\item{output_col}{The name of the output column.}

\item{splits}{A numeric vector of cutpoints, indicating the bucket boundaries.}

\item{input_cols}{Names of input columns.}

\item{output_cols}{Names of output columns.}

\item{splits_array}{Parameter for specifying multiple splits parameters. Each
element in this array can be used to map continuous features into buckets.}

\item{handle_invalid}{(Spark 2.1.0+) Param for how to handle invalid entries. Options are
'skip' (filter out rows with invalid values), 'error' (throw an error), or
'keep' (keep invalid values in a special additional bucket). Default: "error"}

\item{uid}{A character string used to uniquely identify the feature transformer.}

\item{...}{Optional arguments; currently unused.}
}
\value{
The object returned depends on the class of \code{x}.

\itemize{
  \item \code{spark_connection}: When \code{x} is a \code{spark_connection}, the function returns a \code{ml_transformer},
  a \code{ml_estimator}, or one of their subclasses. The object contains a pointer to
  a Spark \code{Transformer} or \code{Estimator} object and can be used to compose
  \code{Pipeline} objects.

  \item \code{ml_pipeline}: When \code{x} is a \code{ml_pipeline}, the function returns a \code{ml_pipeline} with
  the transformer or estimator appended to the pipeline.

  \item \code{tbl_spark}: When \code{x} is a \code{tbl_spark}, a transformer is constructed then
  immediately applied to the input \code{tbl_spark}, returning a \code{tbl_spark}
}
}
\description{
Similar to \R's \code{\link{cut}} function, this transforms a numeric column
into a discretized column, with breaks specified through the \code{splits}
parameter.
}
\examples{
\dontrun{
library(dplyr)

sc <- spark_connect(master = "local")
iris_tbl <- sdf_copy_to(sc, iris, name = "iris_tbl", overwrite = TRUE)

iris_tbl \%>\%
  ft_bucketizer(input_col  = "Sepal_Length",
                output_col = "Sepal_Length_bucket",
                splits     = c(0, 4.5, 5, 8)) \%>\%
  select(Sepal_Length, Sepal_Length_bucket, Species)
}

}
\seealso{
See \url{http://spark.apache.org/docs/latest/ml-features.html} for
  more information on the set of transformations available for DataFrame
  columns in Spark.

Other feature transformers: \code{\link{ft_binarizer}},
  \code{\link{ft_chisq_selector}},
  \code{\link{ft_count_vectorizer}}, \code{\link{ft_dct}},
  \code{\link{ft_elementwise_product}},
  \code{\link{ft_feature_hasher}},
  \code{\link{ft_hashing_tf}}, \code{\link{ft_idf}},
  \code{\link{ft_imputer}},
  \code{\link{ft_index_to_string}},
  \code{\link{ft_interaction}}, \code{\link{ft_lsh}},
  \code{\link{ft_max_abs_scaler}},
  \code{\link{ft_min_max_scaler}}, \code{\link{ft_ngram}},
  \code{\link{ft_normalizer}},
  \code{\link{ft_one_hot_encoder}}, \code{\link{ft_pca}},
  \code{\link{ft_polynomial_expansion}},
  \code{\link{ft_quantile_discretizer}},
  \code{\link{ft_r_formula}},
  \code{\link{ft_regex_tokenizer}},
  \code{\link{ft_sql_transformer}},
  \code{\link{ft_standard_scaler}},
  \code{\link{ft_stop_words_remover}},
  \code{\link{ft_string_indexer}},
  \code{\link{ft_tokenizer}},
  \code{\link{ft_vector_assembler}},
  \code{\link{ft_vector_indexer}},
  \code{\link{ft_vector_slicer}}, \code{\link{ft_word2vec}}
}
\concept{feature transformers}
