% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/spatial_clustering_cv.R
\name{spatial_clustering_cv}
\alias{spatial_clustering_cv}
\title{Spatial Clustering Cross-Validation}
\usage{
spatial_clustering_cv(
  data,
  v = 10,
  cluster_function = c("kmeans", "hclust"),
  radius = NULL,
  buffer = NULL,
  ...,
  repeats = 1,
  distance_function = function(x) as.dist(sf::st_distance(x))
)
}
\arguments{
\item{data}{A data frame or an \code{sf} object (often from \code{\link[sf:st_read]{sf::read_sf()}}
or \code{\link[sf:st_as_sf]{sf::st_as_sf()}}), to split into folds.}

\item{v}{The number of partitions of the data set.}

\item{cluster_function}{Which function should be used for clustering?
Options are either \code{"kmeans"} (to use \code{\link[stats:kmeans]{stats::kmeans()}})
or \code{"hclust"} (to use \code{\link[stats:hclust]{stats::hclust()}}). You can also provide your own
function; see \code{Details}.}

\item{radius}{Numeric: points within this distance of the initially-selected
test points will be assigned to the assessment set. If \code{NULL}, no radius is
applied.}

\item{buffer}{Numeric: points within this distance of any point in the
test set (after \code{radius} is applied) will be assigned to neither the analysis
or assessment set. If \code{NULL}, no buffer is applied.}

\item{...}{Extra arguments passed on to \code{\link[stats:kmeans]{stats::kmeans()}} or
\code{\link[stats:hclust]{stats::hclust()}}.}

\item{repeats}{The number of times to repeat the clustered partitioning.}

\item{distance_function}{Which function should be used for distance
calculations? Defaults to \code{\link[sf:geos_measures]{sf::st_distance()}}, with the output matrix
converted to a \code{\link[stats:dist]{stats::dist()}} object. You can also provide your own
function; see Details.}
}
\value{
A tibble with classes \code{spatial_clustering_cv}, \code{spatial_rset},
\code{rset}, \code{tbl_df}, \code{tbl}, and \code{data.frame}.
The results include a column for the data split objects and
an identification variable \code{id}.
Resamples created from non-\code{sf} objects will not have the
\code{spatial_rset} class.
}
\description{
Spatial clustering cross-validation splits the data into V groups of
disjointed sets by clustering points based on their spatial coordinates.
A resample of the analysis data consists of V-1 of the folds/clusters
while the assessment set contains the final fold/cluster.
}
\details{
Clusters are created based on the distances between observations
if \code{data} is an \code{sf} object. Each cluster is used as a fold for
cross-validation. Depending on how the data are distributed spatially, there
may not be an equal number of observations in each fold.

You can optionally provide a custom function to \code{distance_function.} The
function should take an \code{sf} object and return a \code{\link[stats:dist]{stats::dist()}} object with
distances between data points.

You can optionally provide a custom function to \code{cluster_function}. The
function must take three arguments:
\itemize{
\item \code{dists}, a \code{\link[stats:dist]{stats::dist()}} object with distances between data points
\item \code{v}, a length-1 numeric for the number of folds to create
\item \code{...}, to pass any additional named arguments to your function
}

The function should return a vector of cluster assignments of length
\code{nrow(data)}, with each element of the vector corresponding to the matching
row of the data frame.
}
\section{Changes in spatialsample 0.3.0}{

As of spatialsample version 0.3.0, this function no longer accepts non-\code{sf}
objects as arguments to \code{data}. In order to perform clustering with
non-spatial data, consider using \code{\link[rsample:clustering_cv]{rsample::clustering_cv()}}.

Also as of version 0.3.0, this function now calculates edge-to-edge distance
for non-point geometries, in line with the rest of the package. Earlier
versions relied upon between-centroid distances.
}

\examples{
\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
data(Smithsonian, package = "modeldata")

smithsonian_sf <- sf::st_as_sf(
  Smithsonian,
  coords = c("longitude", "latitude"),
  # Set CRS to WGS84
  crs = 4326
)

# When providing sf objects, coords are inferred automatically
spatial_clustering_cv(smithsonian_sf, v = 5)

# Can use hclust instead:
spatial_clustering_cv(smithsonian_sf, v = 5, cluster_function = "hclust")
\dontshow{\}) # examplesIf}
}
\references{
A. Brenning, "Spatial cross-validation and bootstrap for the assessment of
prediction rules in remote sensing: The R package sperrorest," 2012 IEEE
International Geoscience and Remote Sensing Symposium, Munich, 2012,
pp. 5372-5375, doi: 10.1109/IGARSS.2012.6352393.
}
