% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods-group-features.R
\name{groupFeatures-eic-similarity}
\alias{groupFeatures-eic-similarity}
\alias{EicSimilarityParam-class}
\alias{EicSimilarityParam}
\alias{groupFeatures,XcmsResult,EicSimilarityParam-method}
\title{Compounding/feature grouping based on similarity of extracted ion chromatograms}
\usage{
EicSimilarityParam(
  threshold = 0.9,
  n = 1,
  onlyPeak = TRUE,
  value = c("maxo", "into"),
  groupFun = groupSimilarityMatrix,
  ALIGNFUN = alignRt,
  ALIGNFUNARGS = list(tolerance = 0, method = "closest"),
  FUN = cor,
  FUNARGS = list(use = "pairwise.complete.obs"),
  ...
)

\S4method{groupFeatures}{XcmsResult,EicSimilarityParam}(object, param, msLevel = 1L)
}
\arguments{
\item{threshold}{\code{numeric(1)} with the minimal required similarity score to
group featues. This is passed to the \code{groupFun} function.}

\item{n}{\code{numeric(1)} defining the total number of samples per feature group
on which this similarity calculation should be performed. This value is
rounded up to the next larger integer value.}

\item{onlyPeak}{\code{logical(1)} whether the correlation should be performed only
on the signals within the identified chromatographic peaks
(\code{onlyPeak = TRUE}, default) or all the signal from the extracted ion
chromatogram.}

\item{value}{\code{character(1)} defining whether samples should be grouped based
on the sum of the maximal peak intensity (\code{value = "maxo"}, the default)
or the integrated peak area (\code{value = "into"}) for a feature.}

\item{groupFun}{\code{function} defining the function to be used to group rows
based on a pairwise similarity matrix. Defaults to
\code{\link[=groupSimilarityMatrix]{groupSimilarityMatrix()}}.}

\item{ALIGNFUN}{\code{function} defining the function to be used to \emph{align}
chromatograms prior similarity calculation. Defaults to
\code{ALIGNFUN = alignRt}. See \code{\link[=alignRt]{alignRt()}} and \code{\link[=compareChromatograms]{compareChromatograms()}} for
more information.}

\item{ALIGNFUNARGS}{\strong{named} \code{list} with arguments for \code{ALIGNFUN}.
Defaults to \code{ALIGNFUNARGS = list(tolerance = 0, method = "closest")}.}

\item{FUN}{\code{function} defining the function to be used to calculate a
similarity between (aligned) chromatograms. Defaults to \code{FUN = cor}.
See \code{\link[=cor]{cor()}} and \code{\link[=compareChromatograms]{compareChromatograms()}} for more information.}

\item{FUNARGS}{\strong{named} \code{list} with arguments for \code{FUN}. Defaults to
\code{FUN = list(use = "pairwise.complete.obs")}.}

\item{...}{for \code{EicSimilarityParam}: additional arguments to be passed to
\code{groupFun} and \code{featureChromatograms} (such as \code{expandRt} to expand the
retention time range of each feature).}

\item{object}{\code{\link[=XcmsExperiment]{XcmsExperiment()}} or \code{\link[=XCMSnExp]{XCMSnExp()}} object with LC-MS
pre-processing results.}

\item{param}{\code{EicSimilarityParam} object with the settings for the method.}

\item{msLevel}{\code{integer(1)} defining the MS level on which the features
should be grouped.}
}
\value{
input object with feature groups added (i.e. in column
\code{"feature_group"} of its \code{featureDefinitions} data frame.
}
\description{
Features from the same originating compound are expected to share their
elution pattern (i.e. chromatographic peak shape) with it.
Thus, this methods allows to group features based on similarity of their
extracted ion chromatograms (EICs). The similarity calculation is performed
separately for each sample with the similarity score being aggregated across
samples for the final generation of the similarity matrix on which the
grouping (considering parameter \code{threshold}) will be performed.

The \code{\link[=compareChromatograms]{compareChromatograms()}} function is used for similarity calculation
which by default calculates the Pearson's correlation coefficient. The
settings for \code{compareChromatograms} can be specified with parameters
\code{ALIGNFUN}, \code{ALIGNFUNARGS}, \code{FUN} and \code{FUNARGS}. \code{ALIGNFUN} defaults to
\code{\link[=alignRt]{alignRt()}} and is the function used to \emph{align} the chromatograms before
comparison. \code{ALIGNFUNARGS} allows to specify additional arguments for the
\code{ALIGNFUN} function. It defaults to
\code{ALIGNFUNARGS = list(tolerance = 0, method = "closest")} which ensures that
data points from the same spectrum (scan, i.e. with the same retention time)
are compared between the EICs from the same sample. Parameter \code{FUN} defines
the function to calculate the similarity score and defaults to \code{FUN = cor}
and \code{FUNARGS} allows to pass additional arguments to this function (defaults
to \code{FUNARGS = list(use = "pairwise.complete.obs")}. See also
\code{\link[=compareChromatograms]{compareChromatograms()}} for more information.

The grouping of features based on the EIC similarity matrix is performed
with the function specified with parameter \code{groupFun} which defaults to
\code{groupFun = groupSimilarityMatrix} which groups all rows (features) in the
similarity matrix with a similarity score larger than \code{threshold} into the
same cluster. This creates clusters of features in which \strong{all} features
have a similarity score \verb{>= threshold} with \strong{any} other feature in that
cluster. See \code{\link[=groupSimilarityMatrix]{groupSimilarityMatrix()}} for details. Additional parameters to
that function can be passed with the \code{...} argument.

This feature grouping should be called \strong{after} an initial feature
grouping by retention time (see \code{\link[=SimilarRtimeParam]{SimilarRtimeParam()}}). The feature groups
defined in columns \code{"feature_group"} of \code{featureDefinitions(object)} (for
features matching \code{msLevel}) will be used and refined by this method.
Features with a value of \code{NA} in \code{featureDefinitions(object)$feature_group}
will be skipped/not considered for feature grouping.
}
\note{
At present the \code{\link[=featureChromatograms]{featureChromatograms()}} function is used to extract the
EICs for each feature, which does however use one m/z and rt range for
each feature and the EICs do thus not exactly represent the identified
chromatographic peaks of each sample (i.e. their specific m/z and
retention time ranges).

While being possible to be performed on the full data set without prior
feature grouping, this is not suggested for the following reasons: I) the
selection of the top \code{n} samples with the highest signal for the
\emph{feature group} will be biased by very abundant compounds as this is
performed on the full data set (i.e. the samples with the highest overall
intensities are used for correlation of all features) and II) it is
computationally much more expensive because a pairwise correlation between
all features has to be performed.

It is also suggested to perform the correlation on a subset of samples
per feature with the highest intensities of the peaks (for that feature)
although it would also be possible to run the correlation on all samples by
setting \code{n} equal to the total number of samples in the data set. EIC
correlation should however be performed ideally on samples in which the
original compound is highly abundant to avoid correlation of missing values
or noisy peak shapes as much as possible.

By default also the signal which is outside identified chromatographic peaks
is excluded from the correlation.
}
\examples{

library(MsFeatures)
library(MsExperiment)
## Load a test data set with detected peaks
faahko_sub <- loadXcmsData("faahko_sub2")

## Disable parallel processing for this example
register(SerialParam())

## Group chromatographic peaks across samples
xodg <- groupChromPeaks(faahko_sub, param = PeakDensityParam(sampleGroups = rep(1, 3)))

## Performing a feature grouping based on EIC similarities on a single
## sample
xodg_grp <- groupFeatures(xodg, param = EicSimilarityParam(n = 1))

table(featureDefinitions(xodg_grp)$feature_group)

## Usually it is better to perform this correlation on pre-grouped features
## e.g. based on similar retention time.
xodg_grp <- groupFeatures(xodg, param = SimilarRtimeParam(diffRt = 4))
xodg_grp <- groupFeatures(xodg_grp, param = EicSimilarityParam(n = 1))

table(featureDefinitions(xodg_grp)$feature_group)
}
\seealso{
feature-grouping for a general overview.

Other feature grouping methods: 
\code{\link{groupFeatures-abundance-correlation}},
\code{\link{groupFeatures-similar-rtime}}
}
\author{
Johannes Rainer
}
\concept{feature grouping methods}
