## ----style, echo = FALSE, results = 'asis'--------------------------------------------------------
options(width=100)
knitr::opts_chunk$set(
    eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")),
    cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")))

## ----file.choose, eval=FALSE----------------------------------------------------------------------
#  path <- file.choose()

## ----system.file, echo=FALSE----------------------------------------------------------------------
path <- system.file(package="BiocIntroRPCI", "extdata", "BRFSS-subset.csv")

## ----read.csv-------------------------------------------------------------------------------------
brfss <- read.csv(path)

## ----brfss-sex------------------------------------------------------------------------------------
table(brfss$Sex)

## ----brfss-xtabs----------------------------------------------------------------------------------
xtabs(~ Year + Sex, brfss)

## ----brfss-aggregate------------------------------------------------------------------------------
aggregate(Weight ~ Year + Sex, brfss, mean)

## ----t-test-1990----------------------------------------------------------------------------------
brfss_1990 = brfss[brfss$Year == 1990,]
t.test(Weight ~ Sex, brfss_1990)

## ----brfss-boxplot, fig.width=5, fig.height=5-----------------------------------------------------
boxplot(Weight ~ Year, brfss, subset = (Sex == "Male"),
        main="Males")

## ----brfss-hist, fig.width=5, fig.height=5--------------------------------------------------------
hist(brfss_1990[brfss_1990$Sex == "Female", "Weight"],
     main="Females, 1990", xlab="Weight" )

## ----echo=FALSE-----------------------------------------------------------------------------------
path <- system.file(package="BiocIntroRPCI", "extdata", "ALL-phenoData.csv")

## ----ALL-choose, eval=FALSE-----------------------------------------------------------------------
#  path <- file.choose()    # look for ALL-phenoData.csv

## ----ALL-input------------------------------------------------------------------------------------
stopifnot(file.exists(path))
pdata <- read.csv(path)

## ----ALL-properties-------------------------------------------------------------------------------
class(pdata)
colnames(pdata)
dim(pdata)
head(pdata)
summary(pdata$sex)
summary(pdata$cyto.normal)

## ----ALL-subset-----------------------------------------------------------------------------------
pdata[1:5, 3:4]
pdata[1:5, ]
head(pdata[, 3:5])
tail(pdata[, 3:5], 3)
head(pdata$age)
head(pdata$sex)
head(pdata[pdata$age > 21,])

## ----ALL-subset-NA--------------------------------------------------------------------------------
idx <- pdata$sex == "F" & pdata$age > 40
table(idx, useNA="ifany")
dim(pdata[idx,])           # WARNING: 'NA' rows introduced
tail(pdata[idx,])
dim(subset(pdata, idx))    # BETTER: no NA rows
tail(subset(pdata,idx))

## work-around for `[`: set NA values to FALSE
idx[is.na(idx)] <- FALSE
dim(pdata[idx,])

## ----ALL-BCR/ABL-subset---------------------------------------------------------------------------
bcrabl <- pdata[pdata$mol.biol %in% c("BCR/ABL", "NEG"),]

## ----ALL-BCR/ABL-drop-unused----------------------------------------------------------------------
bcrabl$mol.biol <- droplevels(bcrabl$mol.biol)

## ----ALL-BT---------------------------------------------------------------------------------------
levels(bcrabl$BT)

## ----ALL-BT-recode--------------------------------------------------------------------------------
table(bcrabl$BT)
levels(bcrabl$BT) <- substring(levels(bcrabl$BT), 1, 1)
table(bcrabl$BT)

## ----ALL-BCR/ABL-BT-------------------------------------------------------------------------------
xtabs(~ BT + mol.biol, bcrabl)

## ----ALL-aggregate--------------------------------------------------------------------------------
aggregate(age ~ mol.biol + sex, bcrabl, mean)

## ----ALL-age--------------------------------------------------------------------------------------
t.test(age ~ mol.biol, bcrabl)
boxplot(age ~ mol.biol, bcrabl)

