## Loading required package: kinship2
## Loading required package: Matrix
## Loading required package: quadprog
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
## 
## Attaching package: 'FamAgg'
## The following object is masked from 'package:igraph':
## 
##     cliques
## The following object is masked from 'package:kinship2':
## 
##     pedigree

Package: FamAgg
Authors: J. Rainer, D. Taliun, C.X. Weichenberger
Modified: 2024-10-23 21:20:05.329003
Compiled: Tue Oct 29 17:20:47 2024

1 Introduction

This package provides basic pedigree analysis and plotting utilities as well as a variety of methods to evaluate familial clustering of cases from a given trait. Identification of families or groups of individuals within families with significant aggregation of cases can aid also in the selection of interesting and promising individuals for whole genome or exome sequencing projects.

For kinship coefficient calculations and pedigree plotting the package relies and extends the functionality of the kinship2 package [1].

If you use this package please cite Rainer et al. [2].

2 Basic pedigree operations

In the examples below we perform some simple pedigree operations, such as plotting the pedigree for an individual or family, finding the closest common ancestor for a set of individuals in a pedigree or retrieving the identifiers (IDs) of all ancestors for an individual. Basic pedigree information is stored in FAData objects, thus we first generate such an object from a subset of the Minnesota Breast Cancer Study provided by the kinship2 package. In the example below, we generate the FAData providing a data.frame with the pedigree data, alternatively, the pedigree information could be imported from a file (see Section 3). Upon data set creation the kinship matrix (i.e. a matrix containing the kinship coefficient between each pair of individuals in the whole pedigree) is internally calculated using the functionality from the kinship2 package [1].

library(FamAgg)

data(minnbreast)
## Subsetting to only few families of the whole data set.
mbsub <- minnbreast[minnbreast$famid %in% 4:14, ]
mbped <- mbsub[, c("famid", "id", "fatherid", "motherid", "sex")]
## Renaming column names.
colnames(mbped) <- c("family", "id", "father", "mother", "sex")
## Defining the optional argument age.
endage <- mbsub$endage
names(endage) <- mbsub$id
## Create the object.
fad <- FAData(pedigree = mbped, age = endage)

We can access all the pedigree information stored in this object using the pedigree method, but also using $. The row names of the pedigree data.frame as well as the names of the vectors returned by $ are the IDs of the individuals in the pedigree.

## Use the pedigree method to access the full pedigree
## data.frame,
head(pedigree(fad))
##   family id father mother sex
## 1      4  1     NA     NA   M
## 2      4  2     NA     NA   F
## 3      4  3     25      4   F
## 4      4  4      1      2   F
## 5      4  5      1      2   M
## 6      4  6      1      2   M
## or access individual columns using $.
## The ID of the father (0 representing "founders"):
head(fad$father)
##  1  2  3  4  5  6 
## NA NA 25  1  1  1
## Mother:
head(fad$mother)
##  1  2  3  4  5  6 
## NA NA  4  2  2  2
## Sex:
head(fad$sex)
## 1 2 3 4 5 6 
## M F F F M M 
## Levels: M F
## We can also access the age of each individual, if
## provided.
head(age(fad))
##        1        2        3        4        5        6 
##       NA 78.05886 55.50000 48.00000 75.00342 53.63997

To extract the pedigree for a single family we can use the family method, specifying either the ID of the family or the ID of an individual in the family.

## Extract the pedigree information from family "4"...
nrow(family(fad, family = 4))
## [1] 43
head(family(fad, family = 4))
##   family id father mother sex
## 1      4  1     NA     NA   M
## 2      4  2     NA     NA   F
## 3      4  3     25      4   F
## 4      4  4      1      2   F
## 5      4  5      1      2   M
## 6      4  6      1      2   M
## ...which is the same as extracting the family pedigree
## for an individual of this family.
head(family(fad, id = 3))
##   family id father mother sex
## 1      4  1     NA     NA   M
## 2      4  2     NA     NA   F
## 3      4  3     25      4   F
## 4      4  4      1      2   F
## 5      4  5      1      2   M
## 6      4  6      1      2   M
## Note that IDs are internally always converted to character,
## thus, using id=3 and id="3" return the same information.
head(family(fad, id = "3"))
##   family id father mother sex
## 1      4  1     NA     NA   M
## 2      4  2     NA     NA   F
## 3      4  3     25      4   F
## 4      4  4      1      2   F
## 5      4  5      1      2   M
## 6      4  6      1      2   M

Alternatively, we could subset the FAData to individuals of a single family.

## Subset the object to a single family.
fam4 <- fad[fad$family == "4", ]
table(fam4$family)
## 
##  4 
## 43

To explore this family we can plot its pedigree. By default, the plotting capabilities of the kinship2 package are used to plot pedigrees, but alternatively, if all required dependencies are available, the HaploPainter [3] perl script (http://haplopainter.sourceforge.net/) can be used instead. The switchPlotfun function can be used to switch the plotting back-end. Available arguments are ks2paint and haplopaint for kinship2 and HaploPainter plotting, respectively. Note however, that HaploPainter only allows to export plots to a file, while kinship2 plotting allows, in addition to export the plot, also to show it as a standard R plot.

Below we use the switchPlotfun to ensure the use of kinship2 plotting (usually not required) and plot the full available pedigree of individual 3. If the age of individuals is available, it will be plotted below the individual’s ID.

switchPlotfun("ks2paint")
## By supplying device="plot", we specify that we wish to visualize the
## pedigree in an R plot. This is the default for "ks2paint", anyway.
plotPed(fad, id = 3, device = "plot")

The pedigree for an individual or a list of individuals can be extracted using the buildPed method. By default the method first tries to identify all parents up to 3 generations in the pedigree, and subsequently all children of the individuals and all identified parents.

## Build the pedigree for individual 3.
fullPed <- buildPed(fad, id = "3")
nrow(fullPed)
## [1] 29

Alternatively, we can extract the smallest possible pedigree for a list of individuals by specifying prune=TRUE. Internally, the function transforms the pedigree into a graph, tries to find all paths between the individuals and returns the sub-graph of all individuals along with individuals along the paths between them.

## Find the subpedigree for individuals 21, 22 and 17.
buildPed(fad, id = c(21, 22, 17), prune = TRUE)
##    family id father mother sex
## 3       4  3     25      4   F
## 4       4  4      1      2   F
## 1       4  1     NA     NA   M
## 8       4  8      1      2   F
## 17      4 17     28      8   M
## 21      4 21     24      3   M
## 22      4 22     24      3   F
## 2       4  2     NA     NA   F
## 25      4 25     NA     NA   M
## 28      4 28     NA     NA   M
## 24      4 24     NA     NA   M

And the pedigree plot for that subset of the whole family:

plotPed(fad, id = c(21, 22, 17), prune = TRUE)