Rpy2 demonstration

by Dan McGlinn ([email protected]), 11-07-12

Part 1: Import necessary modules

In [1]:
import rpy2.robjects as robj
import rpy2.rinterface as ri
import rpy2.robjects.vectors as rv
from rpy2.robjects.packages import importr

import numpy as np

Part 2: Interact with the R interpreter by creating and calling R objects

In [2]:
# Interact with R using strings
print robj.r('3 + 4')

robj.r('x = 3')
print robj.r('x')
[1] 7

[1] 3

In [3]:
# Interact with R using attributes
print robj.r.x
print "why are the two subsequent calls to ls producing different results?"
print robj.r.ls
print robj.r.ls()
[1] 3

why are the two subsequent calls to ls producing different results?
function (name, pos = -1, envir = as.environment(pos), all.names = FALSE, 
    pattern) 
{
    if (!missing(name)) {
        nameValue <- try(name, silent = TRUE)
        if (identical(class(nameValue), "try-error")) {
            name <- substitute(name)
            if (!is.character(name)) 
                name <- deparse(name)
            warning(sQuote(name), " converted to character string")
            pos <- name
        }
        else pos <- nameValue
    }
    all.names <- .Internal(ls(envir, all.names))
    if (!missing(pattern)) {
        if ((ll <- length(grep("[", pattern, fixed = TRUE))) && 
            ll != length(grep("]", pattern, fixed = TRUE))) {
            if (pattern == "[") {
                pattern <- "\\["
                warning("replaced regular expression pattern '[' by  '\\\\['")
            }
            else if (length(grep("[^\\\\]\\[<-", pattern))) {
                pattern <- sub("\\[<-", "\\\\\\[<-", pattern)
                warning("replaced '[<-' by '\\\\[<-' in regular expression pattern")
            }
        }
        grep(pattern, all.names, value = TRUE)
    }
    else all.names
}
<bytecode: 0x4516e88>
<environment: namespace:base>

[1] "x"

In [4]:
# create a python vector and compute R stats on it
x = rv.IntVector(range(0,11))
print "why are the two subsequent calls to mean producing different results?"
print robj.r('mean(x)')
print robj.r.mean(x)

print "Here are some other stats"
print "Sum"
print robj.r.sum(x)
print "Variance"
print robj.r.var(x)
why are the two subsequent calls to mean producing different results?
[1] 3

[1] 5

Here are some other stats
Sum
[1] 55

Variance
[1] 11

Part 3: Create and interact with multi-dimensional R objects

In [5]:
# create R matrices
v = robj.FloatVector(robj.r.rnorm(20))
m = robj.r.matrix(v, ncol = 2)
print(m)
print "According to R the column sums are"
print robj.r.apply(m, 2, 'sum')

# convert matrix into a numpy array
m_np = np.array(m)
print(m_np)
            [,1]       [,2]
 [1,] -0.6156952  0.1543477
 [2,] -2.5999678 -0.3216966
 [3,]  0.9813665 -0.4068260
 [4,] -1.4399858  0.4291471
 [5,]  0.8828780 -0.2680895
 [6,] -0.7911376  1.2824338
 [7,] -1.1457275  1.0733889
 [8,] -0.3995717  0.1192389
 [9,] -1.2495037  1.5853373
[10,]  1.6610916  0.3145800

According to R the column sums are
[1] -4.716253  3.961862

[[-0.61569516  0.15434772]
 [-2.59996779 -0.32169662]
 [ 0.9813665  -0.40682599]
 [-1.43998583  0.42914708]
 [ 0.882878   -0.26808947]
 [-0.79113756  1.28243381]
 [-1.1457275   1.0733889 ]
 [-0.39957175  0.11923892]
 [-1.24950375  1.58533734]
 [ 1.66109159  0.31458002]]
In [6]:
# read in data as an R data.frame
faithful = robj.DataFrame.from_csvfile('faithful.dat', sep=' ')
print type(faithful)
print faithful.names
<class 'rpy2.robjects.vectors.DataFrame'>
[1] "eruptions" "waiting"  

In [7]:
# subset columns
# extract as a DataFrame
col_one = faithful.rx(1)
print robj.r.head(col_one)
col_one = faithful.rx('eruptions')
print robj.r.head(col_one)
  eruptions
1     3.600
2     1.800
3     3.333
4     2.283
5     4.533
6     2.883

  eruptions
1     3.600
2     1.800
3     3.333
4     2.283
5     4.533
6     2.883

In [8]:
# extract as a Vector
print faithful.rx2(1)[0:11]
print faithful.rx2('eruptions')[0:11]
 [1] 3.600 1.800 3.333 2.283 4.533 2.883 4.700 3.600 1.950 4.350 1.833

 [1] 3.600 1.800 3.333 2.283 4.533 2.883 4.700 3.600 1.950 4.350 1.833

In [9]:
# subset rows
print faithful.rx(robj.IntVector(range(1,6)), True)
  eruptions waiting
1     3.600      79
2     1.800      54
3     3.333      74
4     2.283      62
5     4.533      85

Part 4: Work with R and python functions

In [10]:
# create an R function and call it in the Python interpreter
robj.r('''
       calc_area_r = function(radius, verbose=FALSE)
       {
           if (verbose) {
               cat("I am calling calc_area_r().\n")
           }
           return( 2 * pi * radius )
       }
       calc_area_r(3)
       ''')

print robj.r.calc_area_r(5, verbose='TRUE')

# Make the calc_area_r a callable python function
calc_area_py = robj.r.calc_area_r
print "Here is the result of calc_area_py()"
print calc_area_py(5)
print type(calc_area_py)
I am calling calc_area_r().
[1] 31.41593

Here is the result of calc_area_py()
[1] 31.41593

<class 'rpy2.robjects.functions.SignatureTranslatedFunction'>
In [11]:
# Rpy supports keyword arguments if there is no pythonic conflict
robj.r.rnorm(10, mean=2, sd=3)
Out[11]:
<FloatVector - Python:0x5a48ea8 / R:0x4a2e088>
[0.056675, -2.211890, 1.191051, ..., 0.768670, -1.918076, -2.066839]
In [12]:
# this will draw an error
robj.r.rpois(10, lambda=3) 
  File "<ipython-input-12-8359b2f2c88b>", line 2
    robj.r.rpois(10, lambda=3)
                           ^
SyntaxError: invalid syntax
In [14]:
# creating a keyword argument dictionary solves the problem
robj.r.rpois(10, **{'lambda' : 3})
Out[14]:
<FloatVector - Python:0x5a45488 / R:0x4786a48>
[1.000000, 2.000000, 2.000000, ..., 2.000000, 5.000000, 4.000000]
In [15]:
# Make a python function and call it within R

ri.initr()

stats = importr('stats')

def quad_f(x):
    x = x[0]
    return x ** -2 + 0.5 * x
   
# wrap the function f so it can be exposed to R
quad_fr = ri.rternalize(quad_f)
    
# define the interval to find the minimum over
interval = rv.IntVector((0, 10))
 
# call R's optimize()
res = stats.optimize(quad_fr, interval)
print res
$minimum
[1] 1.587388

$objective
[1] 1.190551


Part 5: Creating and exporting R graphics

In [16]:
# plotting
x = robj.r.runif(10)
y = robj.r.rnorm(10)

robj.r.plot(x, y, xlab="runif rv", ylab="rnorm rv", col="red")
Out[16]:
rpy2.rinterface.NULL
In [17]:
# close that graphic
robj.r('dev.off()')
Out[17]:
<IntVector - Python:0x5a55908 / R:0x5f5ab98>
[       1]
In [18]:
# export the graphic as a pdf
robj.r.pdf('example_r_plot.pdf')
robj.r.plot(x, y, xlab="runif rv", ylab="rnorm rv", col="red")
robj.r('dev.off()')
Out[18]:
<IntVector - Python:0x5a62cf8 / R:0x59056f8>
[       1]

Part 6: Importing R librairies and pythonizing R

In [19]:
# import R libraries
vegan = importr('vegan')

comm_mat = robj.r.matrix(robj.r.rpois(100, 1), 10, 10)
ca = vegan.cca(comm_mat)
print ca

robj.r.plot(ca)
Call: cca(X = structure(c(1, 1, 1, 2, 0, 1, 0, 0, 2, 1, 0, 1, 0, 1, 2,
0, 1, 1, 1, 0, 1, 1, 0, 4, 1, 1, 0, 1, 0, 1, 2, 2, 1, 1, 0, 0, 1, 2, 0,
1, 2, 1, 0, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 1, 0, 0, 0, 3, 2, 2, 2, 2, 0,
1, 1, 1, 2, 3, 0, 1, 1, 1, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 0, 2,
1, 3, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 2), .Dim = c(10L, 10L)))

              Inertia Rank
Total          0.7991     
Unconstrained  0.7991    9
Inertia is mean squared contingency coefficient 

Eigenvalues for unconstrained axes:
     CA1      CA2      CA3      CA4      CA5      CA6      CA7      CA8 
0.258703 0.161061 0.139109 0.121747 0.050726 0.043811 0.017398 0.005068 
     CA9 
0.001450 


Out[19]:
<ListVector - Python:0x5da4830 / R:0x64c54a0>
[Matrix, Matrix]
  species: <class 'rpy2.robjects.vectors.Matrix'>
  <Matrix - Python:0x5da4680 / R:0x6c15ae0>
[0.214855, -0.793607, 0.211679, ..., -0.656879, 0.178845, 0.737441]
  sites: <class 'rpy2.robjects.vectors.Matrix'>
  <Matrix - Python:0x5da49e0 / R:0x6c17c50>
[0.985231, 0.310632, -0.389409, ..., 1.004003, -0.708114, 0.545716]
In [20]:
# close the graphical device
robj.r('dev.off()')
Out[20]:
<IntVector - Python:0x5da43f8 / R:0x6e43368>
[       1]
In [21]:
# make R pythonic by making it clear which functions are attributed to which packages
base = importr('base')
stats = importr('stats')
graphics = importr('graphics')

m = base.matrix(stats.rnorm(100), ncol = 5)
pca = stats.princomp(m)
graphics.plot(pca, main = "Eigen values")
stats.biplot(pca, main = "biplot")
Out[21]:
rpy2.rinterface.NULL