# Debugging

Our bug: c-crash.R

    ## See http://stackoverflow.com/questions/15816532/
    ## C crash in R-2.15.3, fixed in R-3.0.*
    x <- structure(4:6, .Label = LETTERS[1:3], class="factor")
    write.table(x, tempfile())

Preparation
- compile with flags -ggdb -O0


Inspect write.table: last line enters C code

    .Internal(write.table(x, file, nrow(x), p, rnames, sep, eol, 
              na, dec, as.integer(quote), qmethod != "double"))


Reproduce:

    $ ~/bin/R-2-15-branch/bin/R -f c-crash.R

Run under 'gdb' debugger

    $ ~/bin/R-2-15-branch/bin/R -d gdb -f c-crash.R
    (gdb) run

leads to

    > x <- structure(4:6, .Label = LETTERS[1:3], class="factor")
    > write.table(x, tempfile())
    
    Program received signal SIGSEGV, Segmentation fault.
    0x00007ffff7922920 in Rf_getCharCE (x=0x60000010)
        at /home/mtmorgan/src/R-2-15-branch/src/main/sysutils.c:724
    724        if(TYPEOF(x) != CHARSXP)
    (gdb)

Show listing (list, or just l) in vicinity of crash

    (gdb) l
    719        return ans;
    720    }
    721    
    722    cetype_t getCharCE(SEXP x)
    723    {
    724        if(TYPEOF(x) != CHARSXP)
    725            error(_("'%s' must be called on a CHARSXP"), "getCharCE");
    726        if(IS_UTF8(x)) return CE_UTF8;
    727        else if(IS_LATIN1(x)) return CE_LATIN1;
    728        else if(IS_BYTES(x)) return CE_BYTES;

Look at the call stack (backtrace, bt)

    (gdb) bt
    #0  0x00007ffff7922920 in Rf_getCharCE (x=0x60000010)
        at /home/mtmorgan/src/R-2-15-branch/src/main/sysutils.c:724
    #1  0x00007ffff7920ae7 in Rf_translateChar (x=0x60000010)
        at /home/mtmorgan/src/R-2-15-branch/src/main/sysutils.c:784
    #2  0x00007ffff78f6789 in EncodeElement2 (x=0x193a440, indx=4, quote=TRUE, 
        qmethod=TRUE, buff=0x7fffffffc2f0, cdec=46 '.')
        at /home/mtmorgan/src/R-2-15-branch/src/main/scan.c:1658
    #3  0x00007ffff78f633d in do_writetable (call=0xdadba8, op=0x637a58, 
        args=0xd3e190, rho=0xdacd08)
        at /home/mtmorgan/src/R-2-15-branch/src/main/scan.c:1789
    #4  0x00007ffff787b762 in do_internal (call=0xdadb00, op=0x6113b0, 
        args=0xd3f0a0, env=0xdacd08)
        
Move up call stack 2, show listing

    (gdb) up 2
    #2  0x00007ffff78f6789 in EncodeElement2 (x=0x193a440, indx=4, quote=TRUE, 
        qmethod=TRUE, buff=0x7fffffffc2f0, cdec=46 '.')
        at /home/mtmorgan/src/R-2-15-branch/src/main/scan.c:1658
    1658        p0 = translateChar(STRING_ELT(x, indx));
    (gdb) l
    1653        int nbuf;
    1654        char *q;
    1655        const char *p, *p0;
    1656    
    1657        if(TYPEOF(x) == STRSXP) {
    1658            p0 = translateChar(STRING_ELT(x, indx));
    1659        if(!quote) return p0;
    1660        for(nbuf = 2, p = p0; *p; p++) /* find buffer length needed */
    1661            nbuf += (*p == '"') ? 2 : 1;
    1662        R_AllocStringBuffer(nbuf, buff);

Print indx, x, and call the Rf_PrintValue function to see the R
representation of x

    (gdb) p indx
    $1 = 4
    (gdb) p x
    $2 = (SEXP) 0x193a440
    (gdb) call Rf_PrintValue(x)
    [1] "A" "B" "C"

Weird! STRING_ELT(x, indx) is trying to access the 5th element (C is
0-based) of a character vector of length 3! One more level up shows
that R is treating the integer codings as though they are correct

    (gdb) up
    #3  0x00007ffff78f633d in do_writetable (call=0xdadba8, op=0x637a58, 
        args=0xd3e190, rho=0xdacd08)
        at /home/mtmorgan/src/R-2-15-branch/src/main/scan.c:1789
    1789                    tmp = EncodeElement2(levels[j], INTEGER(xj)[i] - 1,
    (gdb) call Rf_PrintValue(xj)
    [1] <NA> <NA> <NA>
    Levels: A B C
    
Fix?

Actual fix in R-3.0.1:
                              
    > x <- structure(4:6, .Label = LETTERS[1:3], class="factor")
    > write.table(x, tempfile())
    Error in .External2(C_writetable, x, file, nrow(x), p, rnames, sep, eol,  : 
      index out of range
    
One way to discover fix: break on error, then move up call stack

    > ~/bin/R-devel/bin/R -d gdb -f c-crash.R
    (gdb) break Rf_error
    (gdb) run
    >     x <- structure(4:6, .Label = LETTERS[1:3], class="factor")
    >     write.table(x, tempfile())
    
    Breakpoint 1, Rf_error (format=0x7ffff510787c "index out of range")
        at /home/mtmorgan/src/R-3-0-branch/src/main/errors.c:751
    751    {
    (gdb) up
    #1  0x00007ffff5101d5d in EncodeElement2 (x=0x68ed18, indx=3, quote=TRUE, 
        qmethod=TRUE, buff=<optimized out>, cdec=<optimized out>)
        at /home/mtmorgan/src/R-3-0-branch/src/library/utils/src/io.c:955
    955        error(_("index out of range"));
    (gdb) l
    950        int nbuf;
    951        char *q;
    952        const char *p, *p0;
    953    
    954        if (indx < 0 || indx >= length(x))
    955            error(_("index out of range"));
    956        if(TYPEOF(x) == STRSXP) {
    957            p0 = translateChar(STRING_ELT(x, indx));
    958        if(!quote) return p0;
    959        for(nbuf = 2, p = p0; *p; p++) /* find buffer length needed */
    
Who / when fixed?

    $ cd ~/src/R-devel
    $ svn blame src/library/utils/src/io.c | less
    ...
    62707    murdoch     if (indx < 0 || indx >= length(x))
    62707    murdoch       error(_("index out of range"));
    ...
    $ svn log -r62707
    ------------------------------------------------------------------------
    r62707 | murdoch | 2013-05-02 11:23:36 -0700 (Thu, 02 May 2013) | 1 line
    
    Fix for PR#15300
    ------------------------------------------------------------------------

R bug report:

    https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=15300

# valgind: memory corruption

c-valgrind.c:

    #include <Rinternals.h>
    
    SEXP c_off_by_one(SEXP x)
    {
        for (int i = 1; i < Rf_length(x) + 1; ++i)
            /* oops, C uses 0-based indexing */
            REAL(x)[i] = i * i;
        return x;
    }
    
c-valgrind.R

    dyn.load("c-valgrind.so")
    .Call("c_off_by_one", numeric(100))
    
Compiling and evaluating

    $ R CMD SHLIB c-valgrind.c
    clang -I/home/mtmorgan/bin/R-3-0-branch/include -DNDEBUG  -I/usr/local/include  
        -fpic  -ggdb -O0 -Wall -pedantic -c c-valgrind.c -o c-valgrind.o
    clang -shared -L/usr/local/lib -o c-valgrind.so c-valgrind.o 
        -L/home/mtmorgan/bin/R-3-0-branch/lib -lR
    $ R --vanilla -f c-valgrind.R
    > dyn.load("c-valgrind.so")
    > .Call("c_off_by_one", numeric(100))
      [1]    0    1    4    9   16   25   36   49   64   81  100  121  144  169  196
     [16]  225  256  289  324  361  400  441  484  529  576  625  676  729  784  841
     [31]  900  961 1024 1089 1156 1225 1296 1369 1444 1521 1600 1681 1764 1849 1936
     [46] 2025 2116 2209 2304 2401 2500 2601 2704 2809 2916 3025 3136 3249 3364 3481
     [61] 3600 3721 3844 3969 4096 4225 4356 4489 4624 4761 4900 5041 5184 5329 5476
     [76] 5625 5776 5929 6084 6241 6400 6561 6724 6889 7056 7225 7396 7569 7744 7921
     [91] 8100 8281 8464 8649 8836 9025 9216 9409 9604 9801
    >
    
Symptom
- Everything often seems fine 
- Occaisionally observe a segfault, e.g., some platforms or after some
  specific actions

Investigate with valgrind
- slow, so need to have a quickly (seconds) reproducible example

    $ R --vanilla -d valgrind -f c-valgrind.R
    ==1128== Memcheck, a memory error detector
    ==1128== Copyright (C) 2002-2011, and GNU GPL'd, by Julian Seward et al.
    ==1128== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
    ==1128== Command: /home/mtmorgan/bin/R-3-0-branch/bin/exec/R --vanilla -f c-valgrind.R
    > dyn.load("c-valgrind.so")
    > .Call("c_off_by_one", numeric(100))
    ==1128== Invalid write of size 8
    ==1128==    at 0x102D666C: c_off_by_one (c-valgrind.c:7)
    ==1128==    by 0x4EC372B: do_dotcall (dotcode.c:579)
    ==1128==    by 0x4EFFF99: Rf_eval (eval.c:635)
    ==1128==    by 0x4F238AC: Rf_ReplIteration (main.c:258)
    ==1128==    by 0x4F23B2F: R_ReplConsole (main.c:307)
    ==1128==    by 0x4F2404F: run_Rmainloop (main.c:986)
    ==1128==    by 0x400845: main (Rmain.c:32)
    ==1128==  Address 0x10149248 is 0 bytes after a block of size 840 alloc'd
    ==1128==    at 0x4C2B6CD: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==1128==    by 0x4F2BF25: Rf_allocVector (memory.c:2452)
    ==1128==    by 0x4E8D9A4: do_makevector (builtin.c:749)
    ==1128==    by 0x4F2D5EA: do_internal (names.c:1170)
    ==1128==    by 0x4EFFD9B: Rf_eval (eval.c:607)
    ==1128==    by 0x4F03EBF: Rf_applyClosure (eval.c:1003)
    ==1128==    by 0x4EFFAF4: Rf_eval (eval.c:654)
    ==1128==    by 0x4F014AA: Rf_evalList (eval.c:1985)
    ==1128==    by 0x4EFFE74: Rf_eval (eval.c:626)
    ==1128==    by 0x4F238AC: Rf_ReplIteration (main.c:258)
    ==1128==    by 0x4F23B2F: R_ReplConsole (main.c:307)
    ==1128==    by 0x4F2404F: run_Rmainloop (main.c:986)
    ==1128== 
      [1]    0    1    4    9   16   25   36   49   64   81  100  121  144  169  196
     [16]  225  256  289  324  361  400  441  484  529  576  625  676  729  784  841
     [31]  900  961 1024 1089 1156 1225 1296 1369 1444 1521 1600 1681 1764 1849 1936
     [46] 2025 2116 2209 2304 2401 2500 2601 2704 2809 2916 3025 3136 3249 3364 3481
     [61] 3600 3721 3844 3969 4096 4225 4356 4489 4624 4761 4900 5041 5184 5329 5476
     [76] 5625 5776 5929 6084 6241 6400 6561 6724 6889 7056 7225 7396 7569 7744 7921
     [91] 8100 8281 8464 8649 8836 9025 9216 9409 9604 9801
    > 
    ==1128== 
    ==1128== HEAP SUMMARY:
    ==1128==     in use at exit: 108,407,757 bytes in 20,020 blocks
    ==1128==   total heap usage: 32,189 allocs, 12,169 frees, 117,878,550 bytes allocated
    ==1128== 
    ==1128== LEAK SUMMARY:
    ==1128==    definitely lost: 120 bytes in 2 blocks
    ==1128==    indirectly lost: 480 bytes in 20 blocks
    ==1128==      possibly lost: 0 bytes in 0 blocks
    ==1128==    still reachable: 108,407,157 bytes in 19,998 blocks
    ==1128==         suppressed: 0 bytes in 0 blocks
    ==1128== Rerun with --leak-check=full to see details of leaked memory
    ==1128== 
    ==1128== For counts of detected and suppressed errors, rerun with: -v
    ==1128== Use --track-origins=yes to see where uninitialised values come from
    ==1128== ERROR SUMMARY: 19 errors from 4 contexts (suppressed: 2 from 2)
    
# Profiling with operf

Console 1: Where does findOverlaps spend its time?

    library(IRanges)
    ir <- IRanges(sample(100000), width=100)
    system.time(result <- findOverlaps(ir))
    ##   user  system elapsed 
    ##  4.380   0.236   4.623 
    
Console 2: Find and attach to process

    $ ps -u mtmorgan|grep R
    $ operf --pid 1494
    operf: Profiler started
    operf: Press Ctl-c or 'kill -SIGINT 1532' to stop profiling

Return to console 1, run code, back to console 2, press ctrl-c

    $ opannotate --source ~/R/x86_64-unknown-linux-gnu-library/3.0-2.13/IRanges/libs/IRanges.so | less
    /* 
     * Command line: opannotate --source /home/mtmorgan/R/x86_64-unknown-linux-gnu-library/3.0-2.13/IRanges/libs/IRanges.so 
     * 
     * Interpretation of command line:
     * Output annotated source file with samples
     * Output all files
     * 
     * CPU: Intel Sandy Bridge microarchitecture, speed 2.401e+06 MHz (estimated)
     * Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (No unit mask) count 100000
     */

files organized by most-frequently counted; 78% of samples from sort_utils.c

    /* 
     * Total samples for file : "/home/mtmorgan/b/Rpacks/IRanges/src/sort_utils.c"
     * 
     * 132386 78.6177
     */
    
34% of time in compar_aabb

                   :static int compar_aabb(int i1, int i2)
      4428  2.6296 :{ /* compar_aabb total:  57579 34.1934 */
                   :        int ret;
                   :
     24601 14.6094 :        ret = aa[i1] - aa[i2];
     12209  7.2503 :        if (ret != 0)
      2625  1.5589 :                return ret;
      2959  1.7572 :        ret = bb[i1] - bb[i2];
      1455  0.8641 :        return ret;
      9302  5.5240 :}

Times might change substantially when compiled with optimizations
- operf tries to manage this
