% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils-motif.R
\name{utils-motif}
\alias{utils-motif}
\alias{add_gap}
\alias{average_ic}
\alias{compare_columns}
\alias{consensus_to_ppm}
\alias{consensus_to_ppmAA}
\alias{get_consensus}
\alias{get_consensusAA}
\alias{get_matches}
\alias{get_scores}
\alias{icm_to_ppm}
\alias{motif_range}
\alias{motif_score}
\alias{log_string_pval}
\alias{pcm_to_ppm}
\alias{position_icscore}
\alias{ppm_to_icm}
\alias{ppm_to_pcm}
\alias{ppm_to_pwm}
\alias{prob_match}
\alias{prob_match_bkg}
\alias{pwm_to_ppm}
\alias{round_motif}
\alias{score_match}
\alias{summarise_motifs}
\alias{ungap}
\title{Motif-related utility functions.}
\usage{
add_gap(motif, gaploc = ncol(motif)\%/\%2, mingap = 1, maxgap = 5)

average_ic(motifs, average = c("a.mean", "g.mean", "median", "fzt"))

compare_columns(x, y, method, bkg1 = rep(1/length(x), length(x)),
  bkg2 = rep(1/length(y), length(y)), nsites1 = 100, nsites2 = 100)

consensus_to_ppm(letter)

consensus_to_ppmAA(letter)

get_consensus(position, alphabet = "DNA", type = "PPM", pseudocount = 1)

get_consensusAA(position, type = "PPM", pseudocount = 0)

get_matches(motif, score, allow.nonfinite = FALSE)

get_scores(motif, allow.nonfinite = FALSE)

icm_to_ppm(position)

motif_range(motif, use.freq = 1, allow.nonfinite = FALSE)

motif_score(motif, threshold = c(0, 1), use.freq = 1,
  allow.nonfinite = FALSE, threshold.type = c("total", "fromzero"))

log_string_pval(pval)

pcm_to_ppm(position, pseudocount = 0)

position_icscore(position, bkg = numeric(), type = "PPM",
  pseudocount = 1, nsites = 100, relative_entropy = FALSE,
  schneider_correction = FALSE)

ppm_to_icm(position, bkg = numeric(), schneider_correction = FALSE,
  nsites = 100, relative_entropy = FALSE)

ppm_to_pcm(position, nsites = 100)

ppm_to_pwm(position, bkg = numeric(), pseudocount = 1, nsites = 100,
  smooth = TRUE)

prob_match(motif, match, allow.zero = TRUE)

prob_match_bkg(bkg, match)

pwm_to_ppm(position, bkg = numeric())

round_motif(motif, pct.tolerance = 0.05)

score_match(motif, match, allow.nonfinite = FALSE)

summarise_motifs(motifs, na.rm = TRUE)

ungap(motif, delete = FALSE)
}
\arguments{
\item{motif}{Motif object to calculate scores from, or add/remove gap, or round.}

\item{gaploc}{\code{numeric} Motif gap locations. The gap occurs immediately after
every position value. If missing, uses \code{round(ncol(motif) / 2)}.}

\item{mingap}{\code{numeric} Minimum gap size. Must have one value for every location.
If missing, set to 1.}

\item{maxgap}{\code{numeric} Maximum gap size. Must have one value for every location.
If missing, set to 5.}

\item{motifs}{\code{list} A list of \linkS4class{universalmotif} motifs.}

\item{average}{\code{character(1)} One of \code{c("a.mean", "g.mean", "median", "fzt")}.
How to calculate the average motif information content.}

\item{x}{\code{numeric} First column for comparison.}

\item{y}{\code{numeric} Second column for comparison.}

\item{method}{\code{character(1)} Column comparison metric. See \code{\link[=compare_motifs]{compare_motifs()}}
for details.}

\item{bkg1}{\code{numeric} Vector of background probabilities for the first column.
Only relevant if \code{method = "ALLR"}.}

\item{bkg2}{\code{numeric} Vector of background probabilities for the second column.
Only relevant if \code{method = "ALLR"}.}

\item{nsites1}{\code{numeric(1)} Number of sites for the first column. Only relevant
if \code{method = "ALLR"}.}

\item{nsites2}{\code{numeric(1)} Number of sites for the second column. Only relevant
if \code{method = "ALLR"}.}

\item{letter}{\code{character(1)} Any DNA, RNA, or AA IUPAC letter. Ambiguity letters
are accepted.}

\item{position}{\code{numeric} A numeric vector representing the frequency or
probability for each alphabet letter at a specific position.}

\item{alphabet}{\code{character(1)} One of \code{c('DNA', 'RNA')}.}

\item{type}{\code{character(1)} One of \verb{c('PCM', 'PPM', 'PWM' 'ICM')}.}

\item{pseudocount}{\code{numeric(1)} Used to prevent zeroes in motif matrix.}

\item{score}{\code{numeric(1)} Logodds motif score.}

\item{allow.nonfinite}{\code{logical(1)} If \code{FALSE}, then apply a pseudocount if
non-finite values are found in the PWM. Note that if the motif has a
pseudocount greater than zero and the motif is not currently of type PWM,
then this parameter has no effect as the pseudocount will be
applied automatically when the motif is converted to a PWM internally. This
value is set to \code{FALSE} by default in order to stay consistent with
pre-version 1.8.0 behaviour. A message will be printed if a pseudocount
is applied. To disable this, set \code{options(pseudocount.warning=FALSE)}.}

\item{use.freq}{\code{numeric(1)} Use regular motif or the respective \code{multifreq}
representation.}

\item{threshold}{\code{numeric(1)} Any number of numeric values between 0 and 1
representing score percentage.}

\item{threshold.type}{\code{character} For \code{"total"}, a threshold of zero
represents the minimum possible score. This means the range of scores that
can be extracted is from the minimum to the maximum possible scores. For
\code{"fromzero"}, a threshold of zero is a score of zero. This means the range
of scores is from zero to the maximum. The \code{"total"} threshold type can
only be used if no non-finite values are present in the PWM.}

\item{pval}{\code{character(1)} String-formatted p-value.}

\item{bkg}{\code{numeric} Should be the same length as the alphabet length.}

\item{nsites}{\code{numeric(1)} Number of sites motif originated from.}

\item{relative_entropy}{\code{logical(1)} Calculate information content as
relative entropy or Kullback-Leibler divergence.}

\item{schneider_correction}{\code{logical(1)} Apply sample size correction.}

\item{smooth}{\code{logical(1)} Apply pseudocount correction.}

\item{match}{\code{character} Sequence string to calculate score from.}

\item{allow.zero}{\code{logical(1)} If \code{FALSE}, apply a pseudocount if zero values
are found in the background frequencies.}

\item{pct.tolerance}{\code{numeric(1)} or \code{character(1)} The minimum tolerated
proportion each letter must represent per position in order not to be
rounded off, either as a numeric value from 0 to 1 or a percentage written as
a string from "0\%" to "100\%".}

\item{na.rm}{\code{logical} Remove columns where all values are \code{NA}.}

\item{delete}{\code{logical(1)} Clear gap information from motif. If \code{FALSE}, then
it can be reactivated  simply with \code{add_gap(motif)}.}
}
\value{
For \code{\link[=consensus_to_ppm]{consensus_to_ppm()}} and \code{\link[=consensus_to_ppmAA]{consensus_to_ppmAA()}}: a numeric
vector of length 4 and 20, respectively.

For \code{\link[=get_consensus]{get_consensus()}} and \code{\link[=get_consensusAA]{get_consensusAA()}}: a character vector
of length 1.

For \code{\link[=get_matches]{get_matches()}}: a \code{character} vector of motif matches.

For \code{\link[=motif_range]{motif_range()}}: a named \code{numeric} vector of motif scores.

For \code{\link[=motif_score]{motif_score()}}: a named \code{numeric} vector of motif scores.

For \code{\link[=log_string_pval]{log_string_pval()}}: a \code{numeric} vector of length 1.

For \code{\link[=position_icscore]{position_icscore()}}: a \code{numeric} vector of length 1.

For \code{\link[=ppm_to_icm]{ppm_to_icm()}}, \code{\link[=icm_to_ppm]{icm_to_ppm()}}, \code{\link[=pcm_to_ppm]{pcm_to_ppm()}},
\code{\link[=ppm_to_pcm]{ppm_to_pcm()}}, \code{\link[=ppm_to_pwm]{ppm_to_pwm()}}, and \code{\link[=pwm_to_ppm]{pwm_to_ppm()}}: a \code{numeric}
vector with length equal to input \code{numeric} vector.

For \code{\link[=prob_match]{prob_match()}}: a \code{numeric} vector of probabilities.

For \code{\link[=round_motif]{round_motif()}}: the input motif, rounded.

For \code{\link[=score_match]{score_match()}}: a \code{numeric} vector with the match motif score.

For \code{\link[=summarise_motifs]{summarise_motifs()}}: a \code{data.frame} with columns representing
the \linkS4class{universalmotif} slots.
}
\description{
Motif-related utility functions.
}
\examples{
data(examplemotif)
examplemotif0 <- examplemotif
examplemotif0["pseudocount"] <- 0

#######################################################################
## add_gap
## Add gap information to a motif.
m <- create_motif()
# Add a gap size 5-8 between positions 4 and 5:
m <- add_gap(m, gaploc = 4, mingap = 5, maxgap = 8)

#######################################################################
## average_ic
## Calculate the average information content for a list of motifs.
m <- create_motif()
average_ic(m, "fzt")

#######################################################################
## compare_columns
## Compare two numeric vectors using the metrics from compare_motifs()
compare_columns(c(0.5, 0.1, 0.1, 0.2), c(0.7, 0.1, 0.1, 0.1), "PCC")

#######################################################################
## consensus_to_ppm
## Do the opposite of get_consensus. Note that loss of information is
## inevitable. Generates a sequence matrix.
sapply(c("A", "G", "T", "B"), consensus_to_ppm)

#######################################################################
## consensus_to_ppmAA
## Do the opposite of get_consensusAA and generate a motif matrix.
sapply(c("V", "A", "L"), consensus_to_ppmAA)

#######################################################################
## get_consensus
## Get a consensus string from a DNA/RNA motif.
m <- create_motif()["motif"]
apply(m, 2, get_consensus)

#######################################################################
## get_consensusAA
## Get a consensus string from an amino acid motif. Unless each position
## is clearly dominated by a single amino acid, the resulting string will
## likely be useless.
m <- create_motif(alphabet = "AA")["motif"]
apply(m, 2, get_consensusAA, type = "PPM")

#######################################################################
## get_match
## Get all possible motif matches above input score
get_matches(examplemotif, 0)
get_matches(examplemotif0, 0, allow.nonfinite = TRUE)

#######################################################################
## get_scores
## Get all possible scores for a motif
length(get_scores(examplemotif))
get_scores(examplemotif)
get_scores(examplemotif0, allow.nonfinite = TRUE)

#######################################################################
## icm_to_ppm
## Do the opposite of ppm_to_icm.
m <- create_motif(type = "ICM")["motif"]
apply(m, 2, icm_to_ppm)

#######################################################################
## motif_range
## Calculate the range of possible logodds scores for a motif
motif_range(examplemotif)
motif_range(examplemotif, allow.nonfinite = TRUE)

#######################################################################
## motif_score
## Calculate motif score from different thresholds
m <- normalize(examplemotif)
motif_score(m, c(0, 0.8, 1))
motif_score(examplemotif0, c(0, 0.8, 1), allow.nonfinite = TRUE,
   threshold.type = "fromzero")

#######################################################################
## log_string_pval
## Get the log of a string-formatted p-value
log_string_pval("1e-200")

#######################################################################
## pcm_to_ppm
## Go from a count type motif to a probability type motif.
m <- create_motif(type = "PCM", nsites = 50)["motif"]
apply(m, 2, pcm_to_ppm, pseudocount = 1)

#######################################################################
## position_icscore
## Similar to ppm_to_icm, except this calculates the position sum.
m <- create_motif()["motif"]
apply(m, 2, position_icscore, type = "PPM", bkg = rep(0.25, 4))

#######################################################################
## ppm_to_icm
## Convert one column from a probability type motif to an information
## content type motif.
m <- create_motif(nsites = 100, pseudocount = 0.8)["motif"]
apply(m, 2, ppm_to_icm, nsites = 100, bkg = rep(0.25, 4))

#######################################################################
## ppm_to_pcm
## Do the opposite of pcm_to_ppm.
m <- create_motif()["motif"]
apply(m, 2, ppm_to_pcm, nsites = 50)

#######################################################################
## ppm_to_pwm
## Go from a probability type motif to a weight type motif.
m <- create_motif()["motif"]
apply(m, 2, ppm_to_pwm, nsites = 100, bkg = rep(0.25, 4))

#######################################################################
## prob_match, prob_match_bkg
## Calculate probability of a particular match based on background
## frequencies
prob_match(examplemotif, "TATATAT")
## Since this motif has a uniform background, the probability of
## finding any motif hit within the sequence is equal
prob_match(examplemotif, "TATATAG")
m <- examplemotif
m["bkg"] <- c(0.3, 0.2, 0.2, 0.3)
prob_match(m, "TATATAT")
## The prob_match_bkg alternative allows you to simply pass along the
## background frequencies
prob_match_bkg(c(A=0.3, C=0.2, G=0.2, T=0.3), c("TATATAT", "TATATAG"))

#######################################################################
## pwm_to_ppm
## Do the opposite of ppm_to_pwm.
m <- create_motif(type = "PWM")["motif"]
apply(m, 2, pwm_to_ppm, bkg = rep(0.25, 4))

#######################################################################
## Note that not all type conversions can be done directly; for those
## type conversions which are unavailable, universalmotif just chains
## together others (i.e. from PCM -> ICM => pcm_to_ppm -> ppm_to_icm)

#######################################################################
## round_motif
## Round down letter scores to 0
m <- create_motif()
## Remove letters from positions which are less than 5\% of the total
## position:
round_motif(m, pct.tolerance = 0.05)

#######################################################################
## score_match
## Calculate score of a particular match
score_match(examplemotif, "TATATAT")
score_match(examplemotif, "TATATAG")
score_match(examplemotif0, "TATATAT", allow.nonfinite = TRUE)
score_match(examplemotif0, "TATATAG", allow.nonfinite = TRUE)

#######################################################################
## summarise_motifs
## Create a data.frame of information based on a list of motifs.
m1 <- create_motif()
m2 <- create_motif()
m3 <- create_motif()
summarise_motifs(list(m1, m2, m3))

#######################################################################
## ungap
## Unset motif's gap status. Does not delete actual gap data unless
## delete = TRUE.
m <- create_motif()
m <- add_gap(m, 3, 2, 4)
m <- ungap(m)
# Restore gap data:
m <- add_gap(m)

}
\seealso{
\code{\link[=create_motif]{create_motif()}}
}
\author{
Benjamin Jean-Marie Tremblay, \email{benjamin.tremblay@uwaterloo.ca}
}
