import rpy2.robjects as robj
import rpy2.rinterface as ri
import rpy2.robjects.vectors as rv
from rpy2.robjects.packages import importr
import numpy as np
# Interact with R using strings
print robj.r('3 + 4')
robj.r('x = 3')
print robj.r('x')
[1] 7 [1] 3
# Interact with R using attributes
print robj.r.x
print "why are the two subsequent calls to ls producing different results?"
print robj.r.ls
print robj.r.ls()
[1] 3 why are the two subsequent calls to ls producing different results? function (name, pos = -1, envir = as.environment(pos), all.names = FALSE, pattern) { if (!missing(name)) { nameValue <- try(name, silent = TRUE) if (identical(class(nameValue), "try-error")) { name <- substitute(name) if (!is.character(name)) name <- deparse(name) warning(sQuote(name), " converted to character string") pos <- name } else pos <- nameValue } all.names <- .Internal(ls(envir, all.names)) if (!missing(pattern)) { if ((ll <- length(grep("[", pattern, fixed = TRUE))) && ll != length(grep("]", pattern, fixed = TRUE))) { if (pattern == "[") { pattern <- "\\[" warning("replaced regular expression pattern '[' by '\\\\['") } else if (length(grep("[^\\\\]\\[<-", pattern))) { pattern <- sub("\\[<-", "\\\\\\[<-", pattern) warning("replaced '[<-' by '\\\\[<-' in regular expression pattern") } } grep(pattern, all.names, value = TRUE) } else all.names } <bytecode: 0x4516e88> <environment: namespace:base> [1] "x"
# create a python vector and compute R stats on it
x = rv.IntVector(range(0,11))
print "why are the two subsequent calls to mean producing different results?"
print robj.r('mean(x)')
print robj.r.mean(x)
print "Here are some other stats"
print "Sum"
print robj.r.sum(x)
print "Variance"
print robj.r.var(x)
why are the two subsequent calls to mean producing different results? [1] 3 [1] 5 Here are some other stats Sum [1] 55 Variance [1] 11
# create R matrices
v = robj.FloatVector(robj.r.rnorm(20))
m = robj.r.matrix(v, ncol = 2)
print(m)
print "According to R the column sums are"
print robj.r.apply(m, 2, 'sum')
# convert matrix into a numpy array
m_np = np.array(m)
print(m_np)
[,1] [,2] [1,] -0.6156952 0.1543477 [2,] -2.5999678 -0.3216966 [3,] 0.9813665 -0.4068260 [4,] -1.4399858 0.4291471 [5,] 0.8828780 -0.2680895 [6,] -0.7911376 1.2824338 [7,] -1.1457275 1.0733889 [8,] -0.3995717 0.1192389 [9,] -1.2495037 1.5853373 [10,] 1.6610916 0.3145800 According to R the column sums are [1] -4.716253 3.961862 [[-0.61569516 0.15434772] [-2.59996779 -0.32169662] [ 0.9813665 -0.40682599] [-1.43998583 0.42914708] [ 0.882878 -0.26808947] [-0.79113756 1.28243381] [-1.1457275 1.0733889 ] [-0.39957175 0.11923892] [-1.24950375 1.58533734] [ 1.66109159 0.31458002]]
# read in data as an R data.frame
faithful = robj.DataFrame.from_csvfile('faithful.dat', sep=' ')
print type(faithful)
print faithful.names
<class 'rpy2.robjects.vectors.DataFrame'> [1] "eruptions" "waiting"
# subset columns
# extract as a DataFrame
col_one = faithful.rx(1)
print robj.r.head(col_one)
col_one = faithful.rx('eruptions')
print robj.r.head(col_one)
eruptions 1 3.600 2 1.800 3 3.333 4 2.283 5 4.533 6 2.883 eruptions 1 3.600 2 1.800 3 3.333 4 2.283 5 4.533 6 2.883
# extract as a Vector
print faithful.rx2(1)[0:11]
print faithful.rx2('eruptions')[0:11]
[1] 3.600 1.800 3.333 2.283 4.533 2.883 4.700 3.600 1.950 4.350 1.833 [1] 3.600 1.800 3.333 2.283 4.533 2.883 4.700 3.600 1.950 4.350 1.833
# subset rows
print faithful.rx(robj.IntVector(range(1,6)), True)
eruptions waiting 1 3.600 79 2 1.800 54 3 3.333 74 4 2.283 62 5 4.533 85
# create an R function and call it in the Python interpreter
robj.r('''
calc_area_r = function(radius, verbose=FALSE)
{
if (verbose) {
cat("I am calling calc_area_r().\n")
}
return( 2 * pi * radius )
}
calc_area_r(3)
''')
print robj.r.calc_area_r(5, verbose='TRUE')
# Make the calc_area_r a callable python function
calc_area_py = robj.r.calc_area_r
print "Here is the result of calc_area_py()"
print calc_area_py(5)
print type(calc_area_py)
I am calling calc_area_r(). [1] 31.41593 Here is the result of calc_area_py() [1] 31.41593 <class 'rpy2.robjects.functions.SignatureTranslatedFunction'>
# Rpy supports keyword arguments if there is no pythonic conflict
robj.r.rnorm(10, mean=2, sd=3)
<FloatVector - Python:0x5a48ea8 / R:0x4a2e088> [0.056675, -2.211890, 1.191051, ..., 0.768670, -1.918076, -2.066839]
# this will draw an error
robj.r.rpois(10, lambda=3)
File "<ipython-input-12-8359b2f2c88b>", line 2 robj.r.rpois(10, lambda=3) ^ SyntaxError: invalid syntax
# creating a keyword argument dictionary solves the problem
robj.r.rpois(10, **{'lambda' : 3})
<FloatVector - Python:0x5a45488 / R:0x4786a48> [1.000000, 2.000000, 2.000000, ..., 2.000000, 5.000000, 4.000000]
# Make a python function and call it within R
ri.initr()
stats = importr('stats')
def quad_f(x):
x = x[0]
return x ** -2 + 0.5 * x
# wrap the function f so it can be exposed to R
quad_fr = ri.rternalize(quad_f)
# define the interval to find the minimum over
interval = rv.IntVector((0, 10))
# call R's optimize()
res = stats.optimize(quad_fr, interval)
print res
$minimum [1] 1.587388 $objective [1] 1.190551
# plotting
x = robj.r.runif(10)
y = robj.r.rnorm(10)
robj.r.plot(x, y, xlab="runif rv", ylab="rnorm rv", col="red")
rpy2.rinterface.NULL
# close that graphic
robj.r('dev.off()')
<IntVector - Python:0x5a55908 / R:0x5f5ab98> [ 1]
# export the graphic as a pdf
robj.r.pdf('example_r_plot.pdf')
robj.r.plot(x, y, xlab="runif rv", ylab="rnorm rv", col="red")
robj.r('dev.off()')
<IntVector - Python:0x5a62cf8 / R:0x59056f8> [ 1]
# import R libraries
vegan = importr('vegan')
comm_mat = robj.r.matrix(robj.r.rpois(100, 1), 10, 10)
ca = vegan.cca(comm_mat)
print ca
robj.r.plot(ca)
Call: cca(X = structure(c(1, 1, 1, 2, 0, 1, 0, 0, 2, 1, 0, 1, 0, 1, 2, 0, 1, 1, 1, 0, 1, 1, 0, 4, 1, 1, 0, 1, 0, 1, 2, 2, 1, 1, 0, 0, 1, 2, 0, 1, 2, 1, 0, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 1, 0, 0, 0, 3, 2, 2, 2, 2, 0, 1, 1, 1, 2, 3, 0, 1, 1, 1, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 0, 2, 1, 3, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 2), .Dim = c(10L, 10L))) Inertia Rank Total 0.7991 Unconstrained 0.7991 9 Inertia is mean squared contingency coefficient Eigenvalues for unconstrained axes: CA1 CA2 CA3 CA4 CA5 CA6 CA7 CA8 0.258703 0.161061 0.139109 0.121747 0.050726 0.043811 0.017398 0.005068 CA9 0.001450
<ListVector - Python:0x5da4830 / R:0x64c54a0> [Matrix, Matrix] species: <class 'rpy2.robjects.vectors.Matrix'> <Matrix - Python:0x5da4680 / R:0x6c15ae0> [0.214855, -0.793607, 0.211679, ..., -0.656879, 0.178845, 0.737441] sites: <class 'rpy2.robjects.vectors.Matrix'> <Matrix - Python:0x5da49e0 / R:0x6c17c50> [0.985231, 0.310632, -0.389409, ..., 1.004003, -0.708114, 0.545716]
# close the graphical device
robj.r('dev.off()')
<IntVector - Python:0x5da43f8 / R:0x6e43368> [ 1]
# make R pythonic by making it clear which functions are attributed to which packages
base = importr('base')
stats = importr('stats')
graphics = importr('graphics')
m = base.matrix(stats.rnorm(100), ncol = 5)
pca = stats.princomp(m)
graphics.plot(pca, main = "Eigen values")
stats.biplot(pca, main = "biplot")
rpy2.rinterface.NULL