% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kkmeans.R
\name{kkmeans}
\alias{kkmeans}
\title{An Efficient Kernel K-Means Algorithm}
\usage{
kkmeans(
  data,
  k,
  kern = "g",
  param = 1,
  param2 = 1,
  nstart = 10,
  iter_max = 1000L,
  estimate = FALSE,
  nn = 0,
  init_centers = sample(1:k, size = nrow(data), replace = TRUE),
  method = c("otqt", "macqueen", "lloyd", "ot"),
  trueest = FALSE,
  kmat = NULL,
  random_centers = TRUE
)
}
\arguments{
\item{data}{Numeric data to cluster. This will be converted to a matrix using \code{as.matrix}.}

\item{k}{Number of clusters.}

\item{kern}{Kernel to use, one of ('gaussian', 'poly', 'sigmoid', 'laplacian').}

\item{param}{value of parameter to pass to kernel function.(eg sigma in
gaussian kernel). The Gaussian kernel is K(x, y) = exp(- ||x - y||^2 / (2*\code{param}))),
and the polynomial kernel is K(x, y) = (x'y + a) ^ \code{param}}

\item{param2}{value of second parameter parameter to pass to the kernel
function, which correspond to the offset for the sigmoid and polynomial
kernels.}

\item{nstart}{Number of times to run the algorithm. The run with the lowest
total within cluster SSE (in feature space) will be returned}

\item{iter_max}{The maximum number of iterations to allow.}

\item{estimate}{If using the Gaussian kernel, specifying \code{estimate = "mknn"} will use
an \code{nn}-nearest neighbor method for estimating \code{param}.}

\item{nn}{How many neighbors to consider for mknn estimation.}

\item{init_centers}{The initial values for cluster membership. If \code{nstart} is greater
than 1, any start beyond the first iteration will use randomized centers.}

\item{method}{Which method to use for kernel k-means iteration. One of ("otqt", "macqueen", "lloyd").
"otqt" is a method using optimal-transfer and quick-transfer heuristics similar to the Hartigan and
Wong algorithm for k-means clustering.}

\item{trueest}{Whether or not the within-cluster sum of squares should be
recomputed in R after clustering is finished}

\item{kmat}{kernel matrix, if using a custom kernel}

\item{random_centers}{if TRUE, then assign \code{k} observations as initial
clusters, assigning the remaining observations to the closest cluster.
Otherwise, assign all observations to clusters at random.}
}
\value{
A list containing the following useful information
\describe{
\item{cluster}{The final cluster membership.}
\item{centers}{A k x p matrix, the rows of which contain the centers of the clusters in R^n (not to be confused
with the clusters in feature space)}
\item{wss}{The within-cluster sum of squares for each cluster in feature space.}
\item{param}{The parameter value used.}
}
}
\description{
Performs kernel k-means with the specified kernel using an
optimal-transfer quick-transfer algorithm.
}
\examples{
data <- as.matrix(iris[, 1:4])

# cluster using linear kernel (normal k-means)
result <- kkmeans(data, k = 3, kern = "poly", param = 1)

# cluster using gaussian kernel
# estimating the parameter with 3-nearest neighbors
result <- kkmeans(data, k = 3, kern = "g", estimate = "mknn", nn = 3)
}
