% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/NLR.R
\name{NLR}
\alias{NLR}
\title{DIF statistics for non-linear regression models.}
\usage{
NLR(Data, group, model, constraints = NULL, type = "all", method = "nls",
    match = "zscore", anchor = 1:ncol(Data), start, p.adjust.method = "none",
    test = "LR", alpha = 0.05, initboot = TRUE, nrBo = 20, sandwich = FALSE)
}
\arguments{
\item{Data}{data.frame or matrix: dataset in which rows represent scored
examinee answers (\code{"1"} correct, \code{"0"} incorrect) and columns
correspond to the items.}

\item{group}{numeric: a binary vector of a group membership (\code{"0"}
for the reference group, \code{"1"} for the focal group).}

\item{model}{character: generalized logistic regression model to be fitted. See
\strong{Details}.}

\item{constraints}{character: which parameters should be the same for both
groups. Possible values are any combinations of parameters \code{"a"},
\code{"b"}, \code{"c"}, and \code{"d"}. Default value is \code{NULL}.
See \strong{Details}.}

\item{type}{character: type of DIF to be tested. Possible values are
\code{"all"} for detecting difference in any parameter (default),
\code{"udif"} for uniform DIF only (i.e., difference in difficulty
parameter \code{"b"}),
\code{"nudif"} for non-uniform DIF only (i.e., difference in discrimination
parameter \code{"a"}),
\code{"both"} for uniform and non-uniform DIF (i.e., difference in
parameters \code{"a"} and \code{"b"}),
or any combination of parameters \code{"a"}, \code{"b"}, \code{"c"}, and
\code{"d"}. Can be specified as a single value (for all items) or as an
item-specific vector.}

\item{method}{character: an estimation method to be applied. The options are
\code{"nls"} for non-linear least squares (default), \code{"mle"} for the
maximum likelihood method using the \code{"L-BFGS-B"} algorithm with
constraints, \code{"em"} for the maximum likelihood estimation with the EM
algorithm, \code{"plf"} for the maximum likelihood estimation with the
algorithm based on parametric link function, and \code{"irls"} for the maximum
likelihood estimation with the iteratively reweighted least squares algorithm
(available for the \code{"2PL"} model only). See \strong{Details}.}

\item{match}{character or numeric: matching criterion to be used as
an estimate of the trait. It can be either \code{"zscore"} (default,
standardized total score), \code{"score"} (total test score), or
a numeric vector of the same length as a number of observations in
the \code{Data}.}

\item{anchor}{character or numeric: specification of DIF free items. A vector
of item identifiers (integers specifying the column number) specifying
which items are currently considered as anchor (DIF free) items. Argument
is ignored if the \code{match} is not \code{"zscore"} or \code{"score"}.}

\item{start}{numeric: initial values for the estimation of item parameters. If
not specified, starting values are calculated with the
\code{\link[difNLR]{startNLR}} function. Otherwise, a list with as many
elements as a number of items. Each element is a named numeric vector
representing initial values for estimation of item parameters. Specifically,
parameters \code{"a"}, \code{"b"}, \code{"c"}, and \code{"d"} are initial
values for discrimination, difficulty, guessing, and inattention for the
reference group. Parameters \code{"aDif"}, \code{"bDif"}, \code{"cDif"}, and
\code{"dDif"} are then differences in these parameters between the reference
and focal groups. For the \code{method = "irls"}, default initial values from
the \code{\link[stats]{glm}} function are used.}

\item{p.adjust.method}{character: a method for a multiple comparison
correction. Possible values are \code{"holm"}, \code{"hochberg"},
\code{"hommel"}, \code{"bonferroni"}, \code{"BH"}, \code{"BY"},
\code{"fdr"}, and \code{"none"} (default). For more details see
\code{\link[stats]{p.adjust}}.}

\item{test}{character: a statistical test to be performed for DIF detection.
Can be either \code{"LR"} for the likelihood ratio test of a submodel
(default), \code{"W"} for the Wald's test, or \code{"F"} for the F-test of
a submodel.}

\item{alpha}{numeric: a significance level (the default is 0.05).}

\item{initboot}{logical: in the case of convergence issues, should starting
values be re-calculated based on bootstrapped samples? (the default is
\code{TRUE}; newly calculated initial values are applied only to
items/models with convergence issues).}

\item{nrBo}{numeric: the maximal number of iterations for the calculation of
starting values using bootstrapped samples (the default is 20).}

\item{sandwich}{logical: should the sandwich estimator be applied for
computation of the covariance matrix of item parameters when using
\code{method = "nls"}? (the default is \code{FALSE}).}
}
\value{
A list with the following arguments:
\describe{
  \item{\code{Sval}}{the values of the \code{test} statistics.}
  \item{\code{pval}}{the p-values by the \code{test}.}
  \item{\code{adjusted.pval}}{adjusted p-values by the \code{p.adjust.method}.}
  \item{\code{df}}{the degrees of freedom of the \code{test}.}
  \item{\code{test}}{used test.}
  \item{\code{par.m0}}{the matrix of estimated item parameters for the null model.}
  \item{\code{se.m0}}{the matrix of standard errors of item parameters for the null model.}
  \item{\code{cov.m0}}{list of covariance matrices of item parameters for the null model.}
  \item{\code{par.m1}}{the matrix of estimated item parameters for the alternative model.}
  \item{\code{se.m1}}{the matrix of standard errors of item parameters for the alternative model.}
  \item{\code{cov.m1}}{list of covariance matrices of item parameters for the alternative model.}
  \item{\code{cf}}{numeric: a number of convergence issues.}
  \item{\code{cf.which}}{the indicators of the items that did not converge.}
  \item{\code{ll.m0}}{log-likelihood of null model.}
  \item{\code{ll.m1}}{log-likelihood of alternative model.}
  \item{\code{startBo0}}{the binary matrix. Columns represent iterations of initial values
  re-calculations, rows represent items. The value of 0 means no convergence issue in the null model,
  1 means convergence issue in the null model.}
  \item{\code{startBo1}}{the binary matrix. Columns represent iterations of initial values
  re-calculations, rows represent items. The value of 0 means no convergence issue in the alternative model,
  1 means convergence issue in the alternative model.}
}
}
\description{
Calculates likelihood ratio test statistics, F-test statistics, or Wald's
test statistics for DIF detection among dichotomous items using non-linear
regression models (generalized logistic regression models).
}
\details{
The function calculates test statistics using a DIF detection procedure based
on non-linear regression models (i.e., extensions of the logistic regression
procedure; Swaminathan & Rogers, 1990; Drabinova & Martinkova, 2017).

The unconstrained form of the 4PL generalized logistic regression model for
probability of correct answer (i.e., \eqn{Y_{pi} = 1}) using IRT
parameterization is
\deqn{P(Y_{pi} = 1|X_p, G_p) = (c_{iR} \cdot G_p + c_{iF} \cdot (1 - G_p)) +
(d_{iR} \cdot G_p + d_{iF} \cdot (1 - G_p) - c_{iR} \cdot G_p - c_{iF} \cdot
(1 - G_p)) / (1 + \exp(-(a_i + a_{i\text{DIF}} \cdot G_p) \cdot
(X_p - b_p - b_{i\text{DIF}} \cdot G_p))), }
where \eqn{X_p} is the matching criterion (e.g., standardized total score)
and \eqn{G_p} is a group membership variable for respondent \eqn{p}.
Parameters \eqn{a_i}, \eqn{b_i}, \eqn{c_{iR}}, and \eqn{d_{iR}} are
discrimination, difficulty, guessing, and inattention for the reference group
for item \eqn{i}. Terms \eqn{a_{i\text{DIF}}} and \eqn{b_{i\text{DIF}}} then
represent differences between the focal and reference groups in
discrimination and difficulty for item \eqn{i}. Terms \eqn{c_{iF}}, and
\eqn{d_{iF}} are guessing and inattention parameters for the focal group for
item \eqn{i}. In the case that there is no assumed difference between the
reference and focal group in the guessing or inattention parameters, the
terms \eqn{c_i} and \eqn{d_i} are used.

Alternatively, intercept-slope parameterization may be applied:
\deqn{P(Y_{pi} = 1|X_p, G_p) = (c_{iR} \cdot G_p + c_{iF} \cdot (1 - G_p)) +
(d_{iR} \cdot G_p + d_{iF} \cdot (1 - G_p) - c_{iR} \cdot G_p - c_{iF} \cdot
(1 - G_p)) / (1 + \exp(-(\beta_{i0} + \beta_{i1} \cdot X_p +
\beta_{i2} \cdot G_p + \beta_{i3} \cdot X_p \cdot G_p))), }
where parameters \eqn{\beta_{i0}, \beta_{i1}, \beta_{i2}, \beta_{i3}} are
intercept, effect of the matching criterion, effect of the group membership,
and their mutual interaction, respectively.

The \code{model} and \code{constraints} arguments can further constrain the
4PL model. The arguments \code{model} and \code{constraints} can also be
combined. Both arguments can be specified as a single value (for all items)
or as an item-specific vector (where each element corresponds to one item).

The \code{model} argument offers several predefined models. The options are as follows:
\code{Rasch} for 1PL model with discrimination parameter fixed on value 1 for both groups,
\code{1PL} for 1PL model with discrimination parameter set the same for both groups,
\code{2PL} for logistic regression model,
\code{3PLcg} for 3PL model with fixed guessing for both groups,
\code{3PLdg} for 3PL model with fixed inattention for both groups,
\code{3PLc} (alternatively also \code{3PL}) for 3PL regression model with guessing parameter,
\code{3PLd} for 3PL model with inattention parameter,
\code{4PLcgdg} for 4PL model with fixed guessing and inattention parameter for both groups,
\code{4PLcgd} (alternatively also \code{4PLd}) for 4PL model with fixed guessing for both groups,
\code{4PLcdg} (alternatively also \code{4PLc}) for 4PL model with fixed inattention for both groups,
or \code{4PL} for 4PL model.

The function uses intercept-slope parameterization for the estimation via the
\code{\link[difNLR]{estimNLR}} function. Item parameters are then
re-calculated into the IRT parameterization using the delta method.

The function offers either the non-linear least squares estimation via the
\code{\link[stats]{nls}} function (Drabinova & Martinkova, 2017; Hladka &
Martinkova, 2020), the maximum likelihood method with the \code{"L-BFGS-B"}
algorithm with constraints via the \code{\link[stats]{optim}} function
(Hladka & Martinkova, 2020), the maximum likelihood method with the EM
algorithm (Hladka, Martinkova, & Brabec, 2025), the maximum likelihood method
with the algorithm based on parametric link function (Hladka, Martinkova, &
Brabec, 2025), or the maximum likelihood method with the iteratively
reweighted least squares algorithm via the \code{\link[stats]{glm}} function.
}
\examples{
\dontrun{
# loading data
data(GMAT)
Data <- GMAT[, 1:20] # items
group <- GMAT[, "group"] # group membership variable

# testing both DIF effects using the LR test (default)
# and the model with fixed guessing for both groups
NLR(Data, group, model = "3PLcg")

# using the F test and Wald's test
NLR(Data, group, model = "3PLcg", test = "F")
NLR(Data, group, model = "3PLcg", test = "W")

# using the Benjamini-Hochberg correction
NLR(Data, group, model = "3PLcg", p.adjust.method = "BH")

# 4PL model with the same guessing and inattention
# to test uniform DIF
NLR(Data, group, model = "4PLcgdg", type = "udif")

# 2PL model to test non-uniform DIF
NLR(Data, group, model = "2PL", type = "nudif")

# 4PL model with fixed a and c parameters
# to test difference in parameter b
NLR(Data, group, model = "4PL", constraints = "ac", type = "b")

# using various estimation algorithms
NLR(Data, group, model = "3PLcg", method = "nls")
NLR(Data, group, model = "3PLcg", method = "mle")
NLR(Data, group, model = "3PLcg", method = "em")
NLR(Data, group, model = "3PLcg", method = "plf")
NLR(Data, group, model = "2PL", method = "irls")
}

}
\references{
Drabinova, A. & Martinkova, P. (2017). Detection of differential item
functioning with nonlinear regression: A non-IRT approach accounting for
guessing. Journal of Educational Measurement, 54(4), 498--517,
\doi{10.1111/jedm.12158}.

Hladka, A. (2021). Statistical models for detection of differential item
functioning. Dissertation thesis. Faculty of Mathematics and Physics, Charles
University.

Hladka, A. & Martinkova, P. (2020). difNLR: Generalized logistic regression
models for DIF and DDF detection. The R Journal, 12(1), 300--323,
\doi{10.32614/RJ-2020-014}.

Hladka, A., Martinkova, P., & Brabec, M. (2025). New iterative algorithms
for estimation of item functioning. Journal of Educational and Behavioral
Statistics. Online first, \doi{10.3102/10769986241312354}.

Swaminathan, H. & Rogers, H. J. (1990). Detecting differential item
functioning using logistic regression procedures. Journal of Educational
Measurement, 27(4), 361--370, \doi{10.1111/j.1745-3984.1990.tb00754.x}
}
\seealso{
\code{\link[stats]{p.adjust}}
}
\author{
Adela Hladka (nee Drabinova) \cr
Institute of Computer Science of the Czech Academy of Sciences \cr
\email{hladka@cs.cas.cz} \cr

Patricia Martinkova \cr
Institute of Computer Science of the Czech Academy of Sciences \cr
\email{martinkova@cs.cas.cz} \cr

Karel Zvara \cr
Faculty of Mathematics and Physics, Charles University \cr
}
\keyword{DIF}
