% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gdsWrapper_internal.R
\encoding{UTF-8}
\name{generateGDS1KGgenotypeFromSNPPileup}
\alias{generateGDS1KGgenotypeFromSNPPileup}
\title{Append the genotype information from a profile into the associated
Profile GDS File}
\usage{
generateGDS1KGgenotypeFromSNPPileup(
  pathGeno,
  listSamples,
  listPos,
  offset,
  minCov = 10,
  minProb = 0.999,
  seqError = 0.001,
  dfPedProfile,
  batch,
  studyDF,
  pathProfileGDS,
  genoSource,
  verbose
)
}
\arguments{
\item{pathGeno}{a \code{character} string representing the path to the
directory containing the VCF output of SNP-pileup for each sample. The
SNP-pileup files must be compressed (gz files) and have the name identifiers
of the samples. A sample with "Name.ID" identifier would have an
associated file called
if genoSource is "VCF", then "Name.ID.vcf.gz",
if genoSource is "generic", then "Name.ID.generic.txt.gz"
if genoSource is "snp-pileup", then "Name.ID.txt.gz".}

\item{listSamples}{a \code{vector} of \code{character} string corresponding
to the sample identifiers that will have a Profile GDS file created. The
sample identifiers must be present in the "Name.ID" column of the
\code{data.frame} passed to the \code{dfPedProfile} parameter.}

\item{listPos}{a \code{data.frame} containing 2 columns. The first column,
called "snp.chromosome" contains the name of the chromosome where the
SNV is located. The second column, called "snp.position" contains the
position of the SNV on the chromosome.}

\item{offset}{a \code{integer} to adjust if the genome start at 0 or 1.}

\item{minCov}{a single positive \code{integer} representing the minimum
coverage needed to keep the SNVs in the analysis. Default: \code{10}.}

\item{minProb}{a single positive \code{numeric} between 0 and 1
representing the probability that the base change at the SNV position
is not an error.
Default: \code{0.999}.}

\item{seqError}{a single positive \code{numeric} between 0 and 1
representing the sequencing error rate. Default: \code{0.001}.}

\item{dfPedProfile}{a \code{data.frame} with the information about
the sample(s).
Those are mandatory columns: "Name.ID",
"Case.ID", "Sample.Type", "Diagnosis" and "Source". All columns must be in
\code{character} strings format. The \code{data.frame}
must contain the information for all the samples passed in the
\code{listSamples} parameter.}

\item{batch}{a single positive \code{integer} representing the current
identifier for the batch. Beware, this field is not stored anymore.}

\item{studyDF}{a \code{data.frame} containing the information about the
study associated to the analysed sample(s). The \code{data.frame} must have
those 3 columns: "study.id", "study.desc", "study.platform". All columns
must be in \code{character} strings.}

\item{pathProfileGDS}{a \code{character} string representing the path to
the directory where the GDS Sample files will be created.}

\item{genoSource}{a \code{character} string with two possible values:
'snp-pileup', 'generic' or 'VCF'. It specifies if the genotype files
are generated by snp-pileup (Facets) or are a generic format CSV file
with at least those columns:
'Chromosome', 'Position', 'Ref', 'Alt', 'Count', 'File1R' and 'File1A'.
The 'Count' is the depth at the specified position;
'FileR' is the depth of the reference allele and
'File1A' is the depth of the specific alternative allele.
Finally the file can be a VCF file with at least those genotype
fields: GT, AD, DP.}

\item{verbose}{a \code{logical} indicating if the function must print
messages when running.}
}
\value{
The  function returns \code{0L} when successful.
}
\description{
This function append the genotype information from a specific
profile into the Profile GDS file. The genotype information is extracted
from a SNV file as generated by SNP-pileup or other tools.
}
\examples{

## Current directory
dataDir <- file.path(tempdir())

## Copy required file into current directory
file.copy(from=file.path(system.file("extdata/tests", package="RAIDS"),
                    "ex1.txt.gz"), to=dataDir)

## The data.frame containing the information about the study
## The 3 mandatory columns: "study.id", "study.desc", "study.platform"
## The entries should be strings, not factors (stringsAsFactors=FALSE)
studyDF <- data.frame(study.id = "MYDATA",
                        study.desc = "Description",
                        study.platform = "PLATFORM",
                        stringsAsFactors = FALSE)

## The data.frame containing the information about the samples
## The entries should be strings, not factors (stringsAsFactors=FALSE)
samplePED <- data.frame(Name.ID=c("ex1", "ex2"),
                    Case.ID=c("Patient_h11", "Patient_h12"),
                    Diagnosis=rep("Cancer", 2),
                    Sample.Type=rep("Primary Tumor", 2),
                    Source=rep("Databank B", 2), stringsAsFactors=FALSE)
rownames(samplePED) <- samplePED$Name.ID

## List of SNV positions
listPositions <- data.frame(snp.chromosome=c(rep(1, 10)),
        snp.position=c(3467333, 3467428, 3469375, 3469387, 3469502, 3469527,
        3469737, 3471497, 3471565, 3471618))

## Append genotype information to the Profile GDS file
result <- RAIDS:::generateGDS1KGgenotypeFromSNPPileup(pathGeno=dataDir,
            listSamples=c("ex1"), listPos=listPositions,
            offset=-1, minCov=10, minProb=0.999, seqError=0.001,
            dfPedProfile=samplePED, batch=1, studyDF=studyDF,
            pathProfileGDS=dataDir, genoSource="snp-pileup",
            verbose=FALSE)

## The function returns OL when successful
result

## The Profile GDS file 'ex1.gds' has been created in the
## specified directory
list.files(dataDir)

## Unlink Profile GDS file (created for demo purpose)
unlink(file.path(dataDir, "ex1.gds"))
unlink(file.path(dataDir, "ex1.txt.gz"))


}
\author{
Pascal Belleau, Astrid Deschênes and Alexander Krasnitz
}
\keyword{internal}
