% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bootstrap_enrichment_test.R
\name{bootstrap_enrichment_test}
\alias{bootstrap_enrichment_test}
\title{Bootstrap cell type enrichment test}
\usage{
bootstrap_enrichment_test(
  sct_data = NULL,
  hits = NULL,
  bg = NULL,
  genelistSpecies = NULL,
  sctSpecies = NULL,
  sctSpecies_origin = sctSpecies,
  output_species = "human",
  method = "homologene",
  reps = 100,
  no_cores = 1,
  annotLevel = 1,
  geneSizeControl = FALSE,
  controlledCT = NULL,
  mtc_method = "BH",
  sort_results = TRUE,
  standardise_sct_data = TRUE,
  standardise_hits = FALSE,
  verbose = TRUE,
  localHub = FALSE,
  store_gene_data = TRUE
)
}
\arguments{
\item{sct_data}{List generated using \link[EWCE]{generate_celltype_data}.}

\item{hits}{List of gene symbols containing the target gene list.
Will automatically be converted to human gene symbols
if \code{geneSizeControl=TRUE}.}

\item{bg}{List of gene symbols containing the background gene list
(including hit genes). If \code{bg=NULL},
 an appropriate gene background will be created automatically.}

\item{genelistSpecies}{Species that \code{hits} genes came from
(no longer limited to just "mouse" and "human").
See \link[EWCE]{list_species} for all available species.}

\item{sctSpecies}{Species that \code{sct_data} is currently formatted as
(no longer limited to just "mouse" and "human").
See \link[EWCE]{list_species} for all available species.}

\item{sctSpecies_origin}{Species that the \code{sct_data} 
originally came from, regardless of its current gene format 
(e.g. it was previously converted from mouse to human gene orthologs).
 This is used for computing an appropriate backgrund.}

\item{output_species}{Species to convert \code{sct_data} and \code{hits} to
(Default: "human").
See \link[EWCE]{list_species} for all available species.}

\item{method}{R package to use for gene mapping:
\describe{
 \item{\code{"gprofiler"}}{Slower but more species and genes.}
 \item{\code{"homologene"}}{Faster but fewer species and genes.}
 \item{\code{"babelgene"}}{Faster but fewer species and genes.
 Also gives consensus scores for each gene mapping based on a
 several different data sources.}
}}

\item{reps}{Number of random gene lists to generate (\emph{Default: 100},
but should be >=10,000 for publication-quality results).}

\item{no_cores}{Number of cores to parallelise
bootstrapping \code{reps} over.}

\item{annotLevel}{An integer indicating which level of \code{sct_data} to
analyse (\emph{Default: 1}).}

\item{geneSizeControl}{Whether you want to control for
GC content and transcript length. Recommended if the gene list originates
from genetic studies (\emph{Default: FALSE}).
If set to \code{TRUE}, then \code{hits} must be from humans.}

\item{controlledCT}{[Optional] If not NULL, and instead is the name of a
cell type, then the bootstrapping controls for expression within that
cell type.}

\item{mtc_method}{Multiple-testing correction method
(passed to \link[stats]{p.adjust}).}

\item{sort_results}{Sort enrichment results from
smallest to largest p-values.}

\item{standardise_sct_data}{Should \code{sct_data} be standardised? 
if \code{TRUE}:
\itemize{
\item{When \code{sctSpecies!=output_species}
 the \code{sct_data} will be checked for object formatting and 
 the genes will be converted to the orthologs of the \code{output_species} 
 with \link[EWCE]{standardise_ctd}
  (which calls \link[orthogene]{map_genes} internally).
}
\item{When \code{sctSpecies==output_species},
 the \code{sct_data} will be checked for object formatting 
 with  \link[EWCE]{standardise_ctd}, but the gene names 
 will remain untouched.
 }
}}

\item{standardise_hits}{Should \code{hits} be standardised? 
If \code{TRUE}:
\itemize{
\item{When \code{genelistSpecies!=output_species}, 
 the genes will be converted to the orthologs of the \code{output_species}
 with \link[orthogene]{convert_orthologs}.
 }
\item{When \code{genelistSpecies==output_species}, 
 the genes will be standardised with \link[orthogene]{map_genes}.
 }
} 
If \code{FALSE}, \code{hits} will be passed on to subsequent steps as-is.}

\item{verbose}{Print messages.}

\item{localHub}{If working offline, add argument localHub=TRUE to work 
with a local, non-updated hub; It will only have resources available that
have previously been downloaded. If offline, Please also see BiocManager
vignette section on offline use to ensure proper functionality.}

\item{store_gene_data}{Store sampled gene data for every bootstrap iteration.
When the number of bootstrap \code{reps} is very high (>=100k) and/or
 the number of genes in \code{hits} is very high, you may want
 to set \code{store_gene_data=FALSE} to avoid using excessive amounts of 
 CPU memory.}
}
\value{
A list containing three elements:
\itemize{
  \item \code{hit.cells}: vector containing the summed proportion of
  expression in each cell type for the target list.
  \item \code{gene_data: } data.table showing the number of time each gene 
   appeared in the bootstrap sample.
  \item \code{bootstrap_data}: matrix in which each row represents the
  summed proportion of expression in each cell type for one of the
  random lists
  \item \code{controlledCT}: the controlled cell type (if applicable)
}
}
\description{
\code{bootstrap_enrichment_test} takes a genelist and a single cell type
transcriptome dataset and determines the probability of enrichment and fold
changes for each cell type.
}
\examples{
# Load the single cell data
sct_data <- ewceData::ctd()
# Set the parameters for the analysis
# Use 3 bootstrap lists for speed, for publishable analysis use >=10,000
reps <- 3
# Load gene list from Alzheimer's disease GWAS
hits <- ewceData::example_genelist()

# Bootstrap significance test, no control for transcript length or GC content
full_results <- EWCE::bootstrap_enrichment_test(
    sct_data = sct_data,
    hits = hits,
    reps = reps,
    annotLevel = 1,
    sctSpecies = "mouse",
    genelistSpecies = "human")
}
