% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calculate_distances.R
\name{calculate_distances}
\alias{calculate_distances}
\title{Compute Distance or Similarity Matrices}
\usage{
calculate_distances(
  x,
  method = "gower",
  output_format = "dist",
  squared = FALSE,
  p = NULL,
  similarity_transform = "linear",
  ...
)
}
\arguments{
\item{x}{A matrix or data.frame. Each row represents an observation.}

\item{method}{A string specifying the distance/similarity method. Supported:
\itemize{
  \item \strong{Binary}: \code{"jaccard"}, \code{"dice"}, \code{"sokal_michener"}, \code{"russell_rao"},
  \code{"sokal_sneath"}, \code{"kulczynski"},\code{"hamming"}.
  \item \strong{Categorical}: \code{"matching_coefficient"}.
  \item \strong{Continuous}: \code{"euclidean"}, \code{"euclidean_standardized"}, \code{"manhattan"},
   \code{"minkowski"}, \code{"canberra"}, \code{"maximum"}, \code{"cosine"},
   \code{"correlation"}, \code{"mahalanobis"}.
  \item \strong{Mixed}: \code{"gower"}.
}}

\item{output_format}{Output format: \code{"dist"} (distance object), \code{"matrix"} (numeric matrix),
or \code{"similarity"} (only for binary/categorical/mixed methods).}

\item{squared}{Logical; if \code{TRUE}, returns squared distances (not applied to similarities).}

\item{p}{Numeric; the power parameter for the Minkowski distance (required if \code{method = "minkowski"}).}

\item{similarity_transform}{Character string; if \code{output_format = "similarity"}, this specifies the formula to convert distances to similarity scores.
Supported:
\itemize{
  \item \code{"linear"} (default): \eqn{s_{ij} = 1 - \delta_{ij}}
  \item \code{"sqrt"}: \eqn{s_{ij} = 1 - \delta_{ij}^2}
}}

\item{...}{Additional arguments passed to underlying functions.}
}
\value{
Depending on \code{output_format}, returns:
\itemize{
  \item dist object (if \code{output_format = "dist"})
  \item numeric matrix (if \code{output_format = "matrix"} or \code{"output_format = similarity"})
}
}
\description{
Computes a distance or similarity matrix between rows of a data frame or matrix, supporting a wide variety of distance metrics.
}
\details{
When \code{output_format = "similarity"}, the function transforms computed distances into similarity scores using one of the supported transformations.

The similarity transformation options are:
\describe{
  \item{\code{"linear"}}{Direct inversion of distance: \eqn{s_{ij} = 1 - \delta_{ij}}.}
  \item{\code{"sqrt"}}{Squared distance inversion: \eqn{s_{ij} = 1 - \delta_{ij}^2}, which may better preserve Euclidean properties.}
}
}
\examples{
# Load example dataset
data("Data_HC_contamination", package = "dbrobust")
df <- Data_HC_contamination

# --- Quick Example ---
numeric_data <- df[1:10, 1:4]  # subset for speed
d_euclid <- calculate_distances(
  numeric_data,
  method = "euclidean",
  output_format = "matrix"
)
\donttest{
# Load example dataset
data("Data_HC_contamination", package = "dbrobust")
df <- Data_HC_contamination[1:20,]

# Example 1: Euclidean distance (numeric variables only)
numeric_data <- df[, 1:4]
d_euclid <- calculate_distances(
  numeric_data,
  method = "euclidean",
  output_format = "matrix"
)

# Example 2: Manhattan distance
d_manhattan <- calculate_distances(
  numeric_data,
  method = "manhattan",
  output_format = "matrix"
)

# Example 3: Categorical distance using Matching Coefficient
categorical_data <- df[, 5:7]
d_match <- calculate_distances(
  categorical_data,
  method = "matching_coefficient",
  output_format = "matrix"
)

# Example 4: Mixed data distance using Gower (automatic type detection, asymmetric binary)
d_gower_asym <- calculate_distances(
  df,
  method = "gower",
  output_format = "dist",
  binary_asym = TRUE
)

# Example 5: Minkowski distance with p = 3
d_minkowski <- calculate_distances(
  numeric_data,
  method = "minkowski",
  p = 3,
  output_format = "matrix"
)

# Example 6: Jaccard distance for binary variables
binary_data <- df[, 8:9]
d_jaccard <- calculate_distances(
  binary_data,
  method = "jaccard",
  output_format = "matrix"
)

# Example 7: Mahalanobis distance
d_mahal <- calculate_distances(
  numeric_data,
  method = "mahalanobis",
  output_format = "matrix"
)

# Example 8: Manual selection of variables for Gower distance
continuous_vars <- 1:4
binary_vars <- 8:9
categorical_vars <- 5:7
d_gower_manual <- calculate_distances(
  df,
  method = "gower",
  output_format = "dist",
  continuous_cols = continuous_vars,
  binary_cols = binary_vars,
  categorical_cols = categorical_vars
)
}
}
\seealso{
\code{\link[stats]{dist}} for basic distance measures,
  \code{\link[ade4]{dist.binary}} for binary distances,
  \code{\link[proxy]{dist}} for advanced metrics like cosine or correlation
}
