% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/glm_fit.R
\name{GLM fits}
\alias{GLM fits}
\alias{fastLmBG}
\alias{fastLmBG_3d}
\alias{fastLmBG_3dY}
\alias{fastLmBG_3dY_1p}
\alias{fastLmBG_t}
\alias{fastLmBG_f}
\title{Fit design matrices to one or multiple outcomes}
\usage{
fastLmBG(
  X,
  Y,
  QR = qr.default(X),
  Q = qr_Q2(QR, n = n, p = p),
  R = qr_R2(QR, p),
  n = dim(X)[1L],
  p = QR$rank,
  ny = dim(Y)[2L],
  dfR = n - p,
  XtXinv = inv(QR)
)

fastLmBG_3d(
  X,
  Y,
  runX,
  QR = qr(X[, , runX, drop = FALSE]),
  Q = lapply(QR, qr_Q2, n = n, p = p),
  R = lapply(QR, qr_R2, p),
  n = dim(X)[1L],
  p = QR[[1L]]$rank,
  ny = length(runX),
  dfR = n - p,
  XtXinv = inv(QR)
)

fastLmBG_3dY(
  X,
  Y,
  runX,
  QR = qr(X[, , runX, drop = FALSE]),
  Q = lapply(QR, qr_Q2, n = n, p = p),
  R = lapply(QR, qr_R2, p),
  n = dim(X)[1L],
  p = QR[[1L]]$rank,
  ny = length(runX),
  dfR = n - p,
  XtXinv = inv(QR)
)

fastLmBG_3dY_1p(
  X,
  Y,
  runX,
  QR = qr(X[, , runX, drop = FALSE]),
  Q = lapply(QR, qr_Q2, diag(1L, n, 1L), n, 1L),
  R = lapply(QR, function(r) r$qr[1L]),
  n = dim(X)[1L],
  p = 1L,
  ny = length(runX),
  dfR = n - 1L,
  XtXinv = inv(QR)
)

fastLmBG_t(
  fits,
  contrasts,
  alternative = c("two.sided", "less", "greater"),
  alpha = NULL
)

fastLmBG_f(fits, contrasts, rkC = NULL, nC = length(contrasts))
}
\arguments{
\item{X}{Design matrix or 3D array of design matrices}

\item{Y}{Numeric matrix; there should be 1 column for each outcome variable
(so that in a graph-level analysis, this is a column matrix)}

\item{QR, Q, R}{The QR decomposition(s) and Q and R matrix(es) of the design
matrix(es). If \code{X} is a 3D array, these should be \emph{lists}}

\item{n, p, ny, dfR}{Integers; the number of observations, model \emph{rank},
number of regions/outcome variables, and residual degrees of freedom}

\item{XtXinv}{Numeric matrix or array; the inverse of the cross-product of
the design matrix(es)}

\item{runX}{Character vector of the regions for which the design matrix is
not singular}

\item{fits}{List object output by one of the model fitting functions (e.g.,
\code{fastLmBG})}

\item{contrasts}{Numeric matrix (for T statistics) or list of matrices (for F
statistics) specifying the contrast(s) of interest; if only one contrast is
desired, you can supply a vector (for T statistics)}

\item{alternative}{Character string, whether to do a two- or one-sided test.
Default: \code{'two.sided'}}

\item{alpha}{Numeric; the significance level. Default: 0.05}

\item{rkC, nC}{Integers; the rank of the contrast matrix and number of
contrasts, respectively (for F contrasts)}
}
\value{
A list with elements
  \item{coefficients}{Parameter estimates}
  \item{rank}{Model rank}
  \item{df.residual}{Residual degrees of freedom}
  \item{residuals}{Model residuals}
  \item{sigma}{The residual standard deviation, or \emph{root mean square
    error (RMSE)}}
  \item{fitted.values}{Model fitted values}
  \item{qr}{The design matrix QR decomposition(s)}
  \item{cov.unscaled}{The \dQuote{unscaled covariance matrix}}

\code{fastLmBG_t} -- A multidimensional array with the third
  dimension equaling the number of contrasts; each matrix contains the
  contrast of parameter estimates, standard error of the contrast,
  T-statistics, P-values, FDR-adjusted P-values, and confidence intervals (if
  \code{alpha} is given)

\code{fastLmBG_f} -- A numeric matrix with columns for the effect
  size, standard error, F statistic, P-values, and FDR-adjusted P-values
}
\description{
These are the \dQuote{base} model-fitting functions that solve the
\emph{least squares problem} to estimate model coefficients, residuals, etc.
for brain network data.

\code{fastLmBG_t} and \code{fastLmBG_f} calculate contrast-based statistics
for T or F contrasts, respectively. It accepts any number of \emph{contrasts}
(i.e., a multi-row contrast matrix).
}
\section{Parameter estimation}{

These functions use the \emph{QR} decomposition to calculate the least
squares solution which is the same as the base \code{\link[stats]{lm}}
function. If we substitute \eqn{X = QR} in the standard normal equations, the
equation to be solved reduces to
\deqn{X^T X \hat{\beta} = X^T y \Rightarrow R \hat{\beta} = Q^T y}

Since \code{R} is an \emph{upper-triangular} matrix, we can use the
\code{\link{backsolve}} function which is a bit faster than
\code{\link{solve}}. In some cases, the \code{fastLmBG*} functions are about
as fast or faster (particularly when \code{X} is not permuted) as one in
which the normal equations are solved directly; additionally, using the
\emph{QR} method affords greater numerical stability.
}

\section{Different scenarios}{

There are a few different scenarios for fitting models of the data, with a
separate function for each:
\describe{
  \item{fastLmBG}{The main function for when there is a single design matrix
    \eqn{X} and any number of outcome variables \eqn{Y}.}
  \item{fastLmBG_3d}{Fits models when there is a different design matrix
    \eqn{X} for each region and a single outcome variable \eqn{Y}, which in
    this case will be a column matrix.}
  \item{fastLmBG_3dY}{Fits models when there is both a different design
    matrix \eqn{X} and outcome variable \eqn{Y} for each region. Occurs under
    permutation for the Freedman-Lane, ter Braak, and Still-White methods.}
  \item{fastLmBG_3dY_1p}{Fits models when there is both a different design
    and outcome variable for each region, and also when \eqn{X} is a rank-1
    matrix (i.e., it has 1 column). Only occurs under permutation with the
    Still-White method if there is a single regressor of interest.}
}

In the last case above, model coefficients are calculated by simple (i.e.,
non-matrix) algebra.
}

\section{Improving speed/efficiency}{

Speed/efficiency gains will be vast for analyses in which there is a single
design matrix \eqn{X} for all regions, there are multiple outcome variables
(i.e., vertex-level analysis), and the permutation method chosen does
not permute \eqn{X}. Specifically, these are \emph{Freedman-Lane}, \emph{ter
Braak}, and \emph{Manly} methods. Therefore, the QR decomposition, the
\eqn{Q} and \eqn{R} matrices, and the \dQuote{unscaled covariance matrix}
(which is \eqn{(X^T X)^{-1}}) only need to be calculated once for the entire
analysis. Other functions (e.g., \code{lm.fit}) would recalculate these for
each permutation.

Furthermore, this (and the other model fitting functions in the package) will
likely only work in models with full rank. I sacrifice proper error checking
in favor of speed, but hopefully any issues with the model will be identified
prior to the permutation step. Finally, the number of observations, model
rank, number of outcome variables, and degrees of freedom will not change and
therefore do not need to be recalculated (although these probably amount to a
negligible speed boost).

In case there are multiple design matrices, or the permutation method
permutes the design, then the QR decomposition will need to be calculated
each time anyway. For these cases, I use more simplified functions
\code{qr_Q2} and \code{qr_R2} to calculate the \eqn{Q} and \eqn{R} matrices,
and then the fitted values, residuals, and residual standard deviation are
calculated at the same time (whereas \code{lm.fit} and others would calculate
these each time).
}

\section{Contrast-based statistics}{

The \emph{contrast of parameter estimates}, \eqn{\gamma}, for T contrasts is
\deqn{\gamma = C \hat{\beta}}
where \eqn{C} is the contrast matrix with size \eqn{k \times p} (where
\eqn{k} is the number of contrasts) and \eqn{\hat{\beta}} is the matrix of
parameter estimates with size \eqn{p \times r} (where \eqn{r} is the number
of regions). For F contrasts, the effect size is the \emph{extra sum of
squares} and is calculated as
\deqn{\gamma (C (X^T X)^{-1} C^T)^{-1} \gamma^T}
The \emph{standard error} of a T contrast is
\deqn{\sqrt{\hat{\sigma} (X^T X)^{-1}}}
where \eqn{\hat{\sigma}} is the \emph{residual standard deviation} of the
model and the second term is the unscaled covariance matrix. The standard
error for F contrasts is simply the \emph{residual sum of squares}. P-values
and FDR-adjusted P-values (across regions) are also calculated. Finally, if
\eqn{\alpha} is provided for T contrasts, confidence limits are calculated.
}

\seealso{
randomise

Other GLM functions: 
\code{\link{GLM}},
\code{\link{GLM design}},
\code{\link{mtpc}()}
}
\author{
Christopher G. Watson, \email{cgwatson@bu.edu}
}
\concept{GLM functions}
