% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/KendallCCA.R
\name{mixedCCA}
\alias{mixedCCA}
\title{Sparse CCA for data of mixed types with BIC criterion}
\usage{
mixedCCA(
  X1,
  X2,
  type1,
  type2,
  lamseq1 = NULL,
  lamseq2 = NULL,
  nlamseq = 20,
  lam.eps = 0.01,
  w1init = NULL,
  w2init = NULL,
  BICtype,
  KendallR = NULL,
  maxiter = 100,
  tol = 0.01,
  trace = FALSE,
  lassoverbose = FALSE
)
}
\arguments{
\item{X1}{A numeric data matrix (n by p1).}

\item{X2}{A numeric data matrix (n by p2).}

\item{type1}{A type of data \code{X1} among "continuous", "binary", "trunc".}

\item{type2}{A type of data \code{X2} among "continuous", "binary", "trunc".}

\item{lamseq1}{A tuning parameter sequence for \code{X1}. The length should be the same as \code{lamseq2}.}

\item{lamseq2}{A tuning parameter sequence for \code{X2}. The length should be the same as \code{lamseq1}.}

\item{nlamseq}{The number of tuning parameter sequence lambda - default is 20.}

\item{lam.eps}{A ratio of the smallest value for lambda to the maximum value of lambda.}

\item{w1init}{An initial vector of length p1 for canonical direction \eqn{w1}.}

\item{w2init}{An initial vector of length p2 for canonical direction \eqn{w2}.}

\item{BICtype}{Either 1 or 2: For more details for two options, see the reference.}

\item{KendallR}{An estimated Kendall \eqn{\tau} matrix. The default is NULL, which means that it will be automatically estimated by Kendall's \eqn{\tau} estimator unless the user supplies.}

\item{maxiter}{The maximum number of iterations allowed.}

\item{tol}{The desired accuracy (convergence tolerance).}

\item{trace}{If \code{trace = TRUE}, progress per each iteration will be printed. The default value is \code{FALSE}.}

\item{lassoverbose}{If \code{lassoverbose = TRUE}, all warnings from lassobic optimization regarding convergence will be printed. The default value is \code{lassoverbose = FALSE}.}
}
\value{
\code{mixedCCA} returns a data.frame containing
\itemize{
      \item KendallR: estimated Kendall's \eqn{\tau} matrix estimator.
      \item lambda_seq: the values of \code{lamseq} used for sparse CCA.
      \item w1: estimated canonical direction \eqn{w1}.
      \item w2: estimated canonical direction \eqn{w2}.
      \item cancor: estimated canonical correlation.
      \item fitresult: more details regarding the progress at each iteration.
}
}
\description{
Applies sparse canonical correlation analysis (CCA) for high-dimensional data of mixed types (continuous/binary/truncated continuous). Derived rank-based estimator instead of sample correlation matrix is implemented. There are two types of BIC criteria for variable selection. We found that BIC1 works best for variable selection, whereas BIC2 works best for prediction.
}
\examples{
### Simple example

# Data setting
n <- 100; p1 <- 15; p2 <- 10 # sample size and dimensions for two datasets.
maxcancor <- 0.9 # true canonical correlation

# Correlation structure within each data set
set.seed(0)
perm1 <- sample(1:p1, size = p1);
Sigma1 <- autocor(p1, 0.7)[perm1, perm1]
blockind <- sample(1:3, size = p2, replace = TRUE);
Sigma2 <- blockcor(blockind, 0.7)
mu <- rbinom(p1+p2, 1, 0.5)

# true variable indices for each dataset
trueidx1 <- c(rep(1, 3), rep(0, p1-3))
trueidx2 <- c(rep(1, 2), rep(0, p2-2))

# Data generation
simdata <- GenerateData(n=n, trueidx1 = trueidx1, trueidx2 = trueidx2, maxcancor = maxcancor,
                        Sigma1 = Sigma1, Sigma2 = Sigma2,
                        copula1 = "exp", copula2 = "cube",
                        muZ = mu,
                        type1 = "trunc", type2 = "trunc",
                        c1 = rep(1, p1), c2 =  rep(0, p2)
)
X1 <- simdata$X1
X2 <- simdata$X2

# Check the range of truncation levels of variables
range(colMeans(X1 == 0))
range(colMeans(X2 == 0))

# Kendall CCA with BIC1
kendallcca1 <- mixedCCA(X1, X2, type1 = "trunc", type2 = "trunc", BICtype = 1, nlamseq = 10)

# Kendall CCA with BIC2. Estimated correlation matrix is plugged in from the above result.
R <- kendallcca1$KendallR
kendallcca2 <- mixedCCA(X1, X2, type1 = "trunc", type2 = "trunc",
                        KendallR = R, BICtype = 2, nlamseq = 10)
}
\references{
Yoon G., Carroll R.J. and Gaynanova I. (2020) "Sparse semiparametric canonical correlation analysis for data of mixed types" <doi:10.1093/biomet/asaa007>.
}
