% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/extract_matrix.R
\name{extract_matrix}
\alias{extract_matrix}
\title{Extract a matrix from a CellTypeDataset}
\usage{
extract_matrix(
  ctd,
  dataset,
  level = 1,
  input_species = NULL,
  output_species = "human",
  metric = "specificity",
  non121_strategy = "drop_both_species",
  method = "homologene",
  numberOfBins = 40,
  remove_unlabeled_clusters = FALSE,
  force_new_quantiles = FALSE,
  as_sparse = TRUE,
  as_DelayedArray = FALSE,
  rename_columns = TRUE,
  make_columns_unique = FALSE,
  verbose = TRUE,
  ...
)
}
\arguments{
\item{ctd}{Input CellTypeData.}

\item{dataset}{CellTypeData. name.}

\item{level}{CTD level to extract from.}

\item{input_species}{Which species the gene names in \code{exp} come from.
See \link[EWCE]{list_species} for all available species.}

\item{output_species}{Which species' genes names to convert \code{exp} to.
See \link[EWCE]{list_species} for all available species.}

\item{metric}{Name of the matrix to extract.}

\item{non121_strategy}{How to handle genes that don't have
1:1 mappings between \code{input_species}:\code{output_species}.
Options include:\cr
\describe{
\item{\code{"drop_both_species" or "dbs" or 1}}{
Drop genes that have duplicate
mappings in either the \code{input_species} or \code{output_species}
(\emph{DEFAULT}).}
\item{\code{"drop_input_species" or "dis" or 2}}{
Only drop genes that have duplicate
mappings in the \code{input_species}.}
\item{\code{"drop_output_species" or "dos" or 3}}{
Only drop genes that have duplicate
mappings in the \code{output_species}.}
\item{\code{"keep_both_species" or "kbs" or 4}}{
Keep all genes regardless of whether
they have duplicate mappings in either species.}
\item{\code{"keep_popular" or "kp" or 5}}{
Return only the most "popular" interspecies ortholog mappings.
 This procedure tends to yield a greater number of returned genes
 but at the cost of many of them not being true biological 1:1 orthologs.}
 \item{\code{"sum","mean","median","min" or "max"}}{
 When \code{gene_df} is a matrix and \code{gene_output="rownames"},
  these options will aggregate many-to-one gene mappings
  (\code{input_species}-to-\code{output_species})
  after dropping any duplicate genes in the \code{output_species}.
 }
}}

\item{method}{R package to use for gene mapping:
\describe{
 \item{\code{"gprofiler"}}{Slower but more species and genes.}
 \item{\code{"homologene"}}{Faster but fewer species and genes.}
 \item{\code{"babelgene"}}{Faster but fewer species and genes.
 Also gives consensus scores for each gene mapping based on a
 several different data sources.}
}}

\item{numberOfBins}{Number of non-zero quantile bins.}

\item{remove_unlabeled_clusters}{Remove any samples that have
numeric column names.}

\item{force_new_quantiles}{By default, quantile computation is
skipped if they have already been computed.
Set \code{=TRUE} to override this and generate new quantiles.}

\item{as_sparse}{Convert to sparse matrix.}

\item{as_DelayedArray}{Convert to \code{DelayedArray}.}

\item{rename_columns}{Remove \code{replace_chars} from column names.}

\item{make_columns_unique}{Rename each columns with the prefix
\code{dataset.species.celltype}.}

\item{verbose}{Print messages.
Set \code{verbose=2} if you want to print all messages
 from internal functions as well.}

\item{...}{
  Arguments passed on to \code{\link[orthogene:convert_orthologs]{orthogene::convert_orthologs}}
  \describe{
    \item{\code{gene_df}}{Data object containing the genes
(see \code{gene_input} for options on how
the genes can be stored within the object).\cr
Can be one of the following formats:\cr
\describe{
\item{\code{matrix}}{A sparse or dense matrix.}
\item{\code{data.frame}}{A \code{data.frame},
 \code{data.table}. or \code{tibble}.}
\item{\code{list}}{A \code{list} or character \code{vector}.}
}
Genes, transcripts, proteins, SNPs, or genomic ranges
 can be provided in any format
(HGNC, Ensembl, RefSeq, UniProt, etc.) and will be
automatically converted to gene symbols unless
specified otherwise with the \code{...} arguments.\cr
\emph{Note}: If you set \code{method="homologene"}, you
must either supply genes in gene symbol format (e.g. "Sox2")
 OR set \code{standardise_genes=TRUE}.}
    \item{\code{gene_input}}{Which aspect of \code{gene_df} to
get gene names from:\cr
\describe{
\item{\code{"rownames"}}{From row names of data.frame/matrix.}
\item{\code{"colnames"}}{From column names of data.frame/matrix.}
\item{\code{<column name>}}{From a column in \code{gene_df},
 e.g. \code{"gene_names"}.}
}}
    \item{\code{gene_output}}{How to return genes.
Options include:\cr
\describe{
\item{\code{"rownames"}}{As row names of \code{gene_df}.}
\item{\code{"colnames"}}{As column names of \code{gene_df}.}
\item{\code{"columns"}}{As new columns "input_gene", "ortholog_gene"
(and "input_gene_standard" if \code{standardise_genes=TRUE})
in \code{gene_df}.}
\item{\code{"dict"}}{As a dictionary (named list) where the names
are input_gene and the values are ortholog_gene.}
\item{\code{"dict_rev"}}{As a reversed dictionary (named list)
where the names are ortholog_gene and the values are input_gene.}
}}
    \item{\code{standardise_genes}}{If \code{TRUE} AND
\code{gene_output="columns"}, a new column "input_gene_standard"
will be added to \code{gene_df} containing standardised HGNC symbols
identified by \link[gprofiler2]{gorth}.}
    \item{\code{drop_nonorths}}{Drop genes that don't have an ortholog
in the \code{output_species}.}
    \item{\code{agg_fun}}{Aggregation function passed to 
 \link[orthogene]{aggregate_mapped_genes}. 
Set to \code{NULL} to skip aggregation step (default).}
    \item{\code{mthreshold}}{Maximum number of ortholog names per gene to show.
Passed to \link[gprofiler2]{gorth}.
Only used when \code{method="gprofiler"} (\emph{DEFAULT : }\code{Inf}).}
    \item{\code{sort_rows}}{Sort \code{gene_df} rows alphanumerically.}
    \item{\code{gene_map}}{A \link[base]{data.frame} that maps the current gene names
to new gene names. 
This function's behaviour will adapt to different situations as follows: 
\describe{
\item{\code{gene_map=<data.frame>}}{When a data.frame containing the
gene key:value columns 
(specified by \code{input_col} and \code{output_col}, respectively)
is provided, this will be used to perform aggregation/expansion.}
\item{\code{gene_map=NULL} and \code{input_species!=output_species}}{
A \code{gene_map} is automatically generated by
 \link[orthogene]{map_orthologs} to perform inter-species 
 gene aggregation/expansion.}
\item{\code{gene_map=NULL} and \code{input_species==output_species}}{
A \code{gene_map} is automatically generated by
 \link[orthogene]{map_genes} to perform within-species 
 gene gene symbol standardization and aggregation/expansion.}
}}
    \item{\code{input_col}}{Column name within \code{gene_map} with gene names matching 
the row names of \code{X}.}
    \item{\code{output_col}}{Column name within \code{gene_map} with gene names
that you wish you map the row names of \code{X} onto.}
  }}
}
\value{
(specificity) matrix.
}
\description{
Extracts a particular matrix (e.g., mean_exp, specificity)
from a CellTypeDataset object.
}
\keyword{internal}
