# Rpy2 demonstration¶

## by Dan McGlinn ([email protected]), 11-07-12¶

### Part 1: Import necessary modules¶

In [1]:
import rpy2.robjects as robj
import rpy2.rinterface as ri
import rpy2.robjects.vectors as rv
from rpy2.robjects.packages import importr

import numpy as np


### Part 2: Interact with the R interpreter by creating and calling R objects¶

In [2]:
# Interact with R using strings
print robj.r('3 + 4')

robj.r('x = 3')
print robj.r('x')

[1] 7

[1] 3


In [3]:
# Interact with R using attributes
print robj.r.x
print "why are the two subsequent calls to ls producing different results?"
print robj.r.ls
print robj.r.ls()

[1] 3

why are the two subsequent calls to ls producing different results?
function (name, pos = -1, envir = as.environment(pos), all.names = FALSE,
pattern)
{
if (!missing(name)) {
nameValue <- try(name, silent = TRUE)
if (identical(class(nameValue), "try-error")) {
name <- substitute(name)
if (!is.character(name))
name <- deparse(name)
warning(sQuote(name), " converted to character string")
pos <- name
}
else pos <- nameValue
}
all.names <- .Internal(ls(envir, all.names))
if (!missing(pattern)) {
if ((ll <- length(grep("[", pattern, fixed = TRUE))) &&
ll != length(grep("]", pattern, fixed = TRUE))) {
if (pattern == "[") {
pattern <- "\$" warning("replaced regular expression pattern '[' by '\\\\['") } else if (length(grep("[^\\\$\\[<-", pattern))) {
pattern <- sub("\\[<-", "\\\\\\[<-", pattern)
warning("replaced '[<-' by '\\\\[<-' in regular expression pattern")
}
}
grep(pattern, all.names, value = TRUE)
}
else all.names
}
<bytecode: 0x4516e88>
<environment: namespace:base>

[1] "x"


In [4]:
# create a python vector and compute R stats on it
x = rv.IntVector(range(0,11))
print "why are the two subsequent calls to mean producing different results?"
print robj.r('mean(x)')
print robj.r.mean(x)

print "Here are some other stats"
print "Sum"
print robj.r.sum(x)
print "Variance"
print robj.r.var(x)

why are the two subsequent calls to mean producing different results?
[1] 3

[1] 5

Here are some other stats
Sum
[1] 55

Variance
[1] 11



### Part 3: Create and interact with multi-dimensional R objects¶

In [5]:
# create R matrices
v = robj.FloatVector(robj.r.rnorm(20))
m = robj.r.matrix(v, ncol = 2)
print(m)
print "According to R the column sums are"
print robj.r.apply(m, 2, 'sum')

# convert matrix into a numpy array
m_np = np.array(m)
print(m_np)

            [,1]       [,2]
[1,] -0.6156952  0.1543477
[2,] -2.5999678 -0.3216966
[3,]  0.9813665 -0.4068260
[4,] -1.4399858  0.4291471
[5,]  0.8828780 -0.2680895
[6,] -0.7911376  1.2824338
[7,] -1.1457275  1.0733889
[8,] -0.3995717  0.1192389
[9,] -1.2495037  1.5853373
[10,]  1.6610916  0.3145800

According to R the column sums are
[1] -4.716253  3.961862

[[-0.61569516  0.15434772]
[-2.59996779 -0.32169662]
[ 0.9813665  -0.40682599]
[-1.43998583  0.42914708]
[ 0.882878   -0.26808947]
[-0.79113756  1.28243381]
[-1.1457275   1.0733889 ]
[-0.39957175  0.11923892]
[-1.24950375  1.58533734]
[ 1.66109159  0.31458002]]

In [6]:
# read in data as an R data.frame
faithful = robj.DataFrame.from_csvfile('faithful.dat', sep=' ')
print type(faithful)
print faithful.names

<class 'rpy2.robjects.vectors.DataFrame'>
[1] "eruptions" "waiting"


In [7]:
# subset columns
# extract as a DataFrame
col_one = faithful.rx(1)
col_one = faithful.rx('eruptions')

  eruptions
1     3.600
2     1.800
3     3.333
4     2.283
5     4.533
6     2.883

eruptions
1     3.600
2     1.800
3     3.333
4     2.283
5     4.533
6     2.883


In [8]:
# extract as a Vector
print faithful.rx2(1)[0:11]
print faithful.rx2('eruptions')[0:11]

 [1] 3.600 1.800 3.333 2.283 4.533 2.883 4.700 3.600 1.950 4.350 1.833

[1] 3.600 1.800 3.333 2.283 4.533 2.883 4.700 3.600 1.950 4.350 1.833


In [9]:
# subset rows
print faithful.rx(robj.IntVector(range(1,6)), True)

  eruptions waiting
1     3.600      79
2     1.800      54
3     3.333      74
4     2.283      62
5     4.533      85



### Part 4: Work with R and python functions¶

In [10]:
# create an R function and call it in the Python interpreter
robj.r('''
{
if (verbose) {
cat("I am calling calc_area_r().\n")
}
return( 2 * pi * radius )
}
calc_area_r(3)
''')

print robj.r.calc_area_r(5, verbose='TRUE')

# Make the calc_area_r a callable python function
calc_area_py = robj.r.calc_area_r
print "Here is the result of calc_area_py()"
print calc_area_py(5)
print type(calc_area_py)

I am calling calc_area_r().
[1] 31.41593

Here is the result of calc_area_py()
[1] 31.41593

<class 'rpy2.robjects.functions.SignatureTranslatedFunction'>

In [11]:
# Rpy supports keyword arguments if there is no pythonic conflict
robj.r.rnorm(10, mean=2, sd=3)

Out[11]:
<FloatVector - Python:0x5a48ea8 / R:0x4a2e088>
[0.056675, -2.211890, 1.191051, ..., 0.768670, -1.918076, -2.066839]
In [12]:
# this will draw an error
robj.r.rpois(10, lambda=3)

  File "<ipython-input-12-8359b2f2c88b>", line 2
robj.r.rpois(10, lambda=3)
^
SyntaxError: invalid syntax

In [14]:
# creating a keyword argument dictionary solves the problem
robj.r.rpois(10, **{'lambda' : 3})

Out[14]:
<FloatVector - Python:0x5a45488 / R:0x4786a48>
[1.000000, 2.000000, 2.000000, ..., 2.000000, 5.000000, 4.000000]
In [15]:
# Make a python function and call it within R

ri.initr()

stats = importr('stats')

x = x[0]
return x ** -2 + 0.5 * x

# wrap the function f so it can be exposed to R

# define the interval to find the minimum over
interval = rv.IntVector((0, 10))

# call R's optimize()
print res

$minimum [1] 1.587388$objective
[1] 1.190551



### Part 5: Creating and exporting R graphics¶

In [16]:
# plotting
x = robj.r.runif(10)
y = robj.r.rnorm(10)

robj.r.plot(x, y, xlab="runif rv", ylab="rnorm rv", col="red")

Out[16]:
rpy2.rinterface.NULL
In [17]:
# close that graphic
robj.r('dev.off()')

Out[17]:
<IntVector - Python:0x5a55908 / R:0x5f5ab98>
[       1]
In [18]:
# export the graphic as a pdf
robj.r.pdf('example_r_plot.pdf')
robj.r.plot(x, y, xlab="runif rv", ylab="rnorm rv", col="red")
robj.r('dev.off()')

Out[18]:
<IntVector - Python:0x5a62cf8 / R:0x59056f8>
[       1]

### Part 6: Importing R librairies and pythonizing R¶

In [19]:
# import R libraries
vegan = importr('vegan')

comm_mat = robj.r.matrix(robj.r.rpois(100, 1), 10, 10)
ca = vegan.cca(comm_mat)
print ca

robj.r.plot(ca)

Call: cca(X = structure(c(1, 1, 1, 2, 0, 1, 0, 0, 2, 1, 0, 1, 0, 1, 2,
0, 1, 1, 1, 0, 1, 1, 0, 4, 1, 1, 0, 1, 0, 1, 2, 2, 1, 1, 0, 0, 1, 2, 0,
1, 2, 1, 0, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 1, 0, 0, 0, 3, 2, 2, 2, 2, 0,
1, 1, 1, 2, 3, 0, 1, 1, 1, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 0, 2,
1, 3, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 2), .Dim = c(10L, 10L)))

Inertia Rank
Total          0.7991
Unconstrained  0.7991    9
Inertia is mean squared contingency coefficient

Eigenvalues for unconstrained axes:
CA1      CA2      CA3      CA4      CA5      CA6      CA7      CA8
0.258703 0.161061 0.139109 0.121747 0.050726 0.043811 0.017398 0.005068
CA9
0.001450


Out[19]:
<ListVector - Python:0x5da4830 / R:0x64c54a0>
[Matrix, Matrix]
species: <class 'rpy2.robjects.vectors.Matrix'>
<Matrix - Python:0x5da4680 / R:0x6c15ae0>
[0.214855, -0.793607, 0.211679, ..., -0.656879, 0.178845, 0.737441]
sites: <class 'rpy2.robjects.vectors.Matrix'>
<Matrix - Python:0x5da49e0 / R:0x6c17c50>
[0.985231, 0.310632, -0.389409, ..., 1.004003, -0.708114, 0.545716]
In [20]:
# close the graphical device
robj.r('dev.off()')

Out[20]:
<IntVector - Python:0x5da43f8 / R:0x6e43368>
[       1]
In [21]:
# make R pythonic by making it clear which functions are attributed to which packages
base = importr('base')
stats = importr('stats')
graphics = importr('graphics')

m = base.matrix(stats.rnorm(100), ncol = 5)
pca = stats.princomp(m)
graphics.plot(pca, main = "Eigen values")
stats.biplot(pca, main = "biplot")

Out[21]:
rpy2.rinterface.NULL