8 August 2025

linkSet 0.99.19

1 Introduction

This vignette provides a step-by-step guide to using the linkSet package to analyze Hi-C/HiChIP data. We will use a toy example to illustrate the main functions and workflows. Our goal is to identify the enhancer-gene links in this example.

We will use the following datasets as input:

  1. validPairs produced by HiC-Pro.
  2. Mouse embryo body enhancer data from enhancer atlas website.
  3. Gene annotation data from TxDb.Mmusculus.UCSC.mm10.knownGene package.

We highly recommend you to use custom data instead of the example data provided in this vignette.

2 Setup

if (!require("BiocManager", quietly = TRUE)) {
  install.packages("BiocManager")
}
suppressPackageStartupMessages({
  library(linkSet)
  # Load additional packages only if available
  if (requireNamespace("TxDb.Mmusculus.UCSC.mm10.knownGene", quietly = TRUE)) {
    library(TxDb.Mmusculus.UCSC.mm10.knownGene)
  }
  if (requireNamespace("org.Mm.eg.db", quietly = TRUE)) {
    library(org.Mm.eg.db)
  }
  if (requireNamespace("Organism.dplyr", quietly = TRUE)) {
    library(Organism.dplyr)
  }
  if (requireNamespace("InteractionSet", quietly = TRUE)) {
    library(InteractionSet)
  }
  if (requireNamespace("rtracklayer", quietly = TRUE)) {
    library(rtracklayer)
  }
})

We use our custom function readvalidPairs to load the example data. Firstly, we need to load into GInteractions object.

hic_file <- system.file("extdata", "toyExample.pair.gz",
  package = "linkSet"
)
gi <- readvalidPairs(hic_file, format = "pair")
promoterGr <- tryCatch(
  {
    withTxDb("mm10", function(src) {
      genes <- Organism.dplyr::genes(src, columns = "symbol")
      IRanges::promoters(genes, upstream = 10000)
    })
  },
  error = function(e) {
    # Fallback approach using TxDb directly
    if (requireNamespace("TxDb.Mmusculus.UCSC.mm10.knownGene", quietly = TRUE) &&
      requireNamespace("org.Mm.eg.db", quietly = TRUE)) {
      txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene::TxDb.Mmusculus.UCSC.mm10.knownGene
      gene_ranges <- GenomicFeatures::genes(txdb)

      # Add gene symbols
      gene_ids <- names(gene_ranges)
      symbols <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db,
        keys = gene_ids,
        columns = "SYMBOL",
        keytype = "ENTREZID"
      )
      symbol_map <- setNames(symbols$SYMBOL, symbols$ENTREZID)
      mcols(gene_ranges)$symbol <- symbol_map[names(gene_ranges)]
      genes <- gene_ranges[!is.na(mcols(gene_ranges)$symbol)]

      IRanges::promoters(genes, upstream = 10000)
    } else {
      stop("Required packages not available for mm10 annotation")
    }
  }
)
file_path <- system.file("extdata", "Embryo_body.bed.gz", package = "linkSet")
enhancer <- rtracklayer::import(file_path, format = "BED")

Because the hic data only contains digist end, so we resize the region to upstream 5kb and downstream 5kb. After that, we use baitGInteractions to generate the linkSet object.

gi <- resize(gi, 10000, fix = "center")
ls <- baitGInteractions(gi, promoterGr, enhancer, geneSymbol = "symbol")
ls
#> linkSet object with 27 interactions and 2 metadata columns:
#>               bait     seqnames_oe         ranges_oe | anchor1.symbol
#>        <character>           <Rle>         <IRanges> |    <character>
#>    [1]        Xkr4 ---        chr1 51753961-51754690 |           Xkr4
#>    [2]        Xkr4 ---        chr1 38494581-38496180 |           Xkr4
#>    [3]        Xkr4 ---        chr1 38496241-38496840 |           Xkr4
#>    [4]         Rp1 ---        chr1 14496641-14497000 |            Rp1
#>    [5]       Sox17 ---        chr1 90401091-90402520 |          Sox17
#>    ...         ... ...         ...               ... .            ...
#>   [23]      Npbwr1 ---        chr1 59014721-59015250 |         Npbwr1
#>   [24]      Rb1cc1 ---        chr1 45369781-45370210 |         Rb1cc1
#>   [25]      Rb1cc1 ---        chr1 33869921-33870570 |         Rb1cc1
#>   [26]      Rb1cc1 ---        chr1 94074541-94075190 |         Rb1cc1
#>   [27]      Alkal1 ---        chr1 12683361-12683590 |         Alkal1
#>            anchor2.name
#>             <character>
#>    [1] 11.4716435093611
#>    [2] 11.9206847715326
#>    [3] 11.1575156050835
#>    [4] 16.1891332694206
#>    [5] 11.3810398990553
#>    ...              ...
#>   [23] 11.2298655414852
#>   [24] 16.4355294570208
#>   [25] 11.2829690359357
#>   [26] 10.9286062722993
#>   [27] 14.0682990384515
#>   -------
#>   regions: 30 ranges and 0 metadata columns
#>   seqinfo: 66 sequences (1 circular) from mm10 genome

When we print the linkSet object, we can see the basic information of the linkSet object. By default, we don’t show the bait region. But you are interested in the bait region, you can set showBaitRegion = TRUE.

showLinkSet(ls, baitRegion = TRUE)
#> linkSet object with 27 interactions and 2 metadata columns:
#>        bait seqnames_bait     ranges_bait     seqnames_oe         ranges_oe |
#>  [1]   Xkr4          chr1 3671299-3681498 ---        chr1 51753961-51754690 |
#>  [2]   Xkr4          chr1 3671299-3681498 ---        chr1 38494581-38496180 |
#>  [3]   Xkr4          chr1 3671299-3681498 ---        chr1 38496241-38496840 |
#>  [4]    Rp1          chr1 4409042-4419241 ---        chr1 14496641-14497000 |
#>  [5]  Sox17          chr1 4497155-4507354 ---        chr1 90401091-90402520 |
#>  ...    ...           ...             ... ...         ...               ... .
#> [23] Npbwr1          chr1 5917199-5927398 ---        chr1 59014721-59015250 |
#> [24] Rb1cc1          chr1 6196197-6206396 ---        chr1 45369781-45370210 |
#> [25] Rb1cc1          chr1 6196197-6206396 ---        chr1 33869921-33870570 |
#> [26] Rb1cc1          chr1 6196197-6206396 ---        chr1 94074541-94075190 |
#> [27] Alkal1          chr1 6349218-6359417 ---        chr1 12683361-12683590 |
#>      anchor1.symbol     anchor2.name
#>  [1]           Xkr4 11.4716435093611
#>  [2]           Xkr4 11.9206847715326
#>  [3]           Xkr4 11.1575156050835
#>  [4]            Rp1 16.1891332694206
#>  [5]          Sox17 11.3810398990553
#>  ...            ...              ...
#> [23]         Npbwr1 11.2298655414852
#> [24]         Rb1cc1 16.4355294570208
#> [25]         Rb1cc1 11.2829690359357
#> [26]         Rb1cc1 10.9286062722993
#> [27]         Alkal1 14.0682990384515

5 Session Information

sessionInfo()
#> R version 4.5.1 (2025-06-13)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.2 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.22-bioc/R/lib/libRblas.so 
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: America/New_York
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] rtracklayer_1.69.1                       
#>  [2] InteractionSet_1.37.1                    
#>  [3] SummarizedExperiment_1.39.1              
#>  [4] MatrixGenerics_1.21.0                    
#>  [5] matrixStats_1.5.0                        
#>  [6] Organism.dplyr_1.37.1                    
#>  [7] AnnotationFilter_1.33.0                  
#>  [8] dplyr_1.1.4                              
#>  [9] org.Mm.eg.db_3.21.0                      
#> [10] TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0
#> [11] GenomicFeatures_1.61.6                   
#> [12] AnnotationDbi_1.71.1                     
#> [13] Biobase_2.69.0                           
#> [14] linkSet_0.99.19                          
#> [15] GenomicRanges_1.61.1                     
#> [16] Seqinfo_0.99.2                           
#> [17] IRanges_2.43.0                           
#> [18] S4Vectors_0.47.0                         
#> [19] BiocGenerics_0.55.1                      
#> [20] generics_0.1.4                           
#> [21] BiocStyle_2.37.1                         
#> 
#> loaded via a namespace (and not attached):
#>  [1] tidyselect_1.2.1         farver_2.1.2             blob_1.2.4              
#>  [4] filelock_1.0.3           Biostrings_2.77.2        bitops_1.0-9            
#>  [7] fastmap_1.2.0            lazyeval_0.2.2           RCurl_1.98-1.17         
#> [10] BiocFileCache_2.99.5     GenomicAlignments_1.45.2 XML_3.99-0.18           
#> [13] digest_0.6.37            lifecycle_1.0.4          KEGGREST_1.49.1         
#> [16] RSQLite_2.4.2            magrittr_2.0.3           compiler_4.5.1          
#> [19] rlang_1.1.6              sass_0.4.10              tools_4.5.1             
#> [22] yaml_2.3.10              data.table_1.17.8        knitr_1.50              
#> [25] labeling_0.4.3           S4Arrays_1.9.1           bit_4.6.0               
#> [28] curl_6.4.0               DelayedArray_0.35.2      RColorBrewer_1.1-3      
#> [31] abind_1.4-8              BiocParallel_1.43.4      withr_3.0.2             
#> [34] purrr_1.1.0              grid_4.5.1               ggplot2_3.5.2           
#> [37] scales_1.4.0             iterators_1.0.14         tinytex_0.57            
#> [40] dichromat_2.0-0.1        cli_3.6.5                rmarkdown_2.29          
#> [43] crayon_1.5.3             httr_1.4.7               rjson_0.2.23            
#> [46] DBI_1.2.3                cachem_1.1.0             parallel_4.5.1          
#> [49] BiocManager_1.30.26      XVector_0.49.0           restfulr_0.0.16         
#> [52] vctrs_0.6.5              Matrix_1.7-3             jsonlite_2.0.0          
#> [55] bookdown_0.43            patchwork_1.3.1          bit64_4.6.0-1           
#> [58] magick_2.8.7             foreach_1.5.2            jquerylib_0.1.4         
#> [61] glue_1.8.0               codetools_0.2-20         gtable_0.3.6            
#> [64] GenomeInfoDb_1.45.9      BiocIO_1.19.0            UCSC.utils_1.5.0        
#> [67] tibble_3.3.0             pillar_1.11.0            rappdirs_0.3.3          
#> [70] htmltools_0.5.8.1        R6_2.6.1                 dbplyr_2.5.0            
#> [73] httr2_1.2.1              evaluate_1.0.4           lattice_0.22-7          
#> [76] png_0.1-8                Rsamtools_2.25.2         memoise_2.0.1           
#> [79] bslib_0.9.0              Rcpp_1.1.0               SparseArray_1.9.1       
#> [82] xfun_0.52                pkgconfig_2.0.3