The R code is cribbed from the UCLA course, based on Hosmer & Lemeshow.
First, which version of iPython are you using?
import IPython
print 'ipython version is %s'% IPython.release.version
print 'rmagic is >= 0.13, I believe'
ipython version is 0.14.dev rmagic is >= 0.13, I believe
now load the R module:
%load_ext rmagic
%%R
# you are now dropped down in R;
# which version of R am I using, BTW?
print(R.version)
_ platform x86_64-pc-linux-gnu arch x86_64 os linux-gnu system x86_64, linux-gnu status major 2 minor 15.2 year 2012 month 10 day 26 svn rev 61015 language R version.string R version 2.15.2 (2012-10-26) nickname Trick or Treat
%%R
# my notebook is wider than the default 80 chars, so set it here:
options(width=300)
# get the survival package if you do not have it:
if (! length(which(.packages(all.available=TRUE) %in% "survival"))) {
install.packages("survival",repos="http://cran.cnr.berkeley.edu/")
}
library(survival)
Loading required package: splines
Start using R as you would normally, although echoing has to be performed explicitly.
%%R
hmohiv<-read.table("http://www.ats.ucla.edu/stat/r/examples/asa/hmohiv.csv", sep=",", header = TRUE)
attach(hmohiv)
print(head(hmohiv))
ID time age drug censor entdate enddate 1 1 5 46 0 1 5/15/1990 10/14/1990 2 2 6 35 1 0 9/19/1989 3/20/1990 3 3 8 30 1 1 4/21/1991 12/20/1991 4 4 3 30 1 1 1/3/1991 4/4/1991 5 5 22 36 0 1 9/18/1989 7/19/1991 6 6 1 32 1 0 3/18/1991 4/17/1991
%%R -w 800 -h 600
# figures work fine, but you may want to set the height and width with the -w and -h flags as above
psymbol<-censor+1
table(psymbol)
plot(age, time, pch=(psymbol))
legend(40, 60, c("Censor=1", "Censor=0"), pch=(psymbol))
%%R
# this is all still cribbed from the UCLA page
test <- survreg( Surv(time, censor) ~ age, dist="exponential")
# note again, you have to explicitly echo
print(summary(test))
Call: survreg(formula = Surv(time, censor) ~ age, dist = "exponential") Value Std. Error z p (Intercept) 5.8590 0.5853 10.01 1.37e-23 age -0.0939 0.0158 -5.96 2.59e-09 Scale fixed at 1 Exponential distribution Loglik(model)= -275 Loglik(intercept only)= -292.3 Chisq= 34.5 on 1 degrees of freedom, p= 4.3e-09 Number of Newton-Raphson Iterations: 4 n= 100
You can pass data back and forth, but I am not an expert on this aspect; Access the ipython help for Rpush
and Rpull
for more details.
%Rpull hmohiv
# for a raw pull, the column information is lost?
hmohiv
array([[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100], [ 5, 6, 8, 3, 22, 1, 7, 9, 3, 12, 2, 12, 1, 15, 34, 1, 4, 19, 3, 2, 2, 6, 60, 11, 2, 5, 4, 1, 13, 3, 2, 1, 30, 7, 4, 8, 5, 10, 2, 9, 36, 3, 9, 3, 35, 8, 11, 56, 2, 3, 15, 1, 10, 1, 7, 3, 3, 2, 32, 3, 10, 11, 3, 7, 5, 31, 5, 58, 1, 3, 43, 1, 6, 53, 14, 4, 54, 1, 1, 8, 5, 1, 1, 2, 7, 1, 10, 24, 7, 12, 4, 57, 1, 12, 7, 1, 5, 60, 2, 1], [ 46, 35, 30, 30, 36, 32, 36, 31, 48, 47, 28, 34, 44, 32, 36, 36, 54, 35, 44, 38, 40, 34, 25, 32, 42, 47, 30, 47, 41, 40, 43, 41, 30, 37, 42, 31, 39, 32, 51, 36, 43, 39, 33, 45, 33, 28, 31, 20, 44, 39, 33, 31, 33, 50, 36, 30, 42, 32, 34, 38, 33, 39, 39, 33, 34, 34, 46, 22, 44, 37, 25, 38, 32, 34, 29, 36, 21, 26, 32, 42, 40, 37, 47, 32, 41, 46, 26, 30, 32, 31, 35, 36, 41, 36, 35, 34, 28, 29, 35, 34], [ 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1], [ 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1], [ 57, 94, 45, 27, 93, 34, 6, 10, 29, 78, 43, 53, 12, 31, 51, 41, 85, 66, 68, 28, 83, 63, 8, 65, 88, 19, 64, 54, 38, 73, 92, 70, 48, 58, 33, 32, 86, 77, 20, 47, 23, 49, 76, 22, 18, 71, 43, 61, 7, 13, 6, 3, 36, 75, 72, 5, 40, 37, 89, 90, 50, 44, 26, 91, 25, 39, 44, 69, 3, 30, 56, 87, 17, 81, 14, 82, 59, 35, 4, 60, 67, 84, 21, 95, 9, 74, 2, 1, 24, 96, 42, 15, 16, 11, 80, 79, 62, 46, 55, 52], [ 3, 46, 28, 57, 78, 53, 72, 85, 63, 83, 71, 60, 38, 64, 42, 59, 29, 35, 88, 48, 8, 15, 12, 59, 21, 66, 91, 67, 56, 4, 13, 79, 7, 27, 75, 5, 34, 71, 40, 32, 31, 80, 58, 49, 14, 39, 44, 23, 10, 55, 36, 9, 20, 86, 37, 43, 77, 62, 61, 26, 41, 46, 65, 51, 69, 2, 90, 52, 9, 68, 24, 93, 73, 25, 45, 30, 11, 54, 19, 22, 21, 92, 33, 16, 74, 84, 82, 1, 81, 93, 87, 94, 18, 17, 47, 89, 6, 50, 76, 70]], dtype=int32)
%Rpull -d hmohiv
# now it is pulled as something else, but what I am not sure
hmohiv
array([(1, 5, 46, 0, 1, 57, 3), (2, 6, 35, 1, 0, 94, 46), (3, 8, 30, 1, 1, 45, 28), (4, 3, 30, 1, 1, 27, 57), (5, 22, 36, 0, 1, 93, 78), (6, 1, 32, 1, 0, 34, 53), (7, 7, 36, 1, 1, 6, 72), (8, 9, 31, 1, 1, 10, 85), (9, 3, 48, 0, 1, 29, 63), (10, 12, 47, 0, 1, 78, 83), (11, 2, 28, 1, 0, 43, 71), (12, 12, 34, 0, 1, 53, 60), (13, 1, 44, 1, 1, 12, 38), (14, 15, 32, 1, 1, 31, 64), (15, 34, 36, 0, 1, 51, 42), (16, 1, 36, 0, 1, 41, 59), (17, 4, 54, 0, 1, 85, 29), (18, 19, 35, 0, 0, 66, 35), (19, 3, 44, 1, 0, 68, 88), (20, 2, 38, 0, 1, 28, 48), (21, 2, 40, 0, 0, 83, 8), (22, 6, 34, 1, 1, 63, 15), (23, 60, 25, 0, 0, 8, 12), (24, 11, 32, 0, 1, 65, 59), (25, 2, 42, 1, 0, 88, 21), (26, 5, 47, 0, 1, 19, 66), (27, 4, 30, 0, 0, 64, 91), (28, 1, 47, 1, 1, 54, 67), (29, 13, 41, 0, 1, 38, 56), (30, 3, 40, 1, 1, 73, 4), (31, 2, 43, 0, 1, 92, 13), (32, 1, 41, 0, 1, 70, 79), (33, 30, 30, 0, 1, 48, 7), (34, 7, 37, 0, 1, 58, 27), (35, 4, 42, 1, 1, 33, 75), (36, 8, 31, 1, 1, 32, 5), (37, 5, 39, 1, 1, 86, 34), (38, 10, 32, 0, 1, 77, 71), (39, 2, 51, 0, 1, 20, 40), (40, 9, 36, 0, 1, 47, 32), (41, 36, 43, 0, 1, 23, 31), (42, 3, 39, 0, 1, 49, 80), (43, 9, 33, 0, 1, 76, 58), (44, 3, 45, 1, 1, 22, 49), (45, 35, 33, 0, 1, 18, 14), (46, 8, 28, 0, 1, 71, 39), (47, 11, 31, 0, 1, 43, 44), (48, 56, 20, 1, 0, 61, 23), (49, 2, 44, 0, 0, 7, 10), (50, 3, 39, 1, 1, 13, 55), (51, 15, 33, 0, 1, 6, 36), (52, 1, 31, 0, 1, 3, 9), (53, 10, 33, 0, 1, 36, 20), (54, 1, 50, 1, 1, 75, 86), (55, 7, 36, 1, 1, 72, 37), (56, 3, 30, 1, 1, 5, 43), (57, 3, 42, 1, 1, 40, 77), (58, 2, 32, 1, 1, 37, 62), (59, 32, 34, 0, 1, 89, 61), (60, 3, 38, 1, 1, 90, 26), (61, 10, 33, 0, 0, 50, 41), (62, 11, 39, 1, 1, 44, 46), (63, 3, 39, 1, 1, 26, 65), (64, 7, 33, 1, 1, 91, 51), (65, 5, 34, 1, 1, 25, 69), (66, 31, 34, 0, 1, 39, 2), (67, 5, 46, 1, 1, 44, 90), (68, 58, 22, 0, 1, 69, 52), (69, 1, 44, 1, 1, 3, 9), (70, 3, 37, 0, 0, 30, 68), (71, 43, 25, 0, 1, 56, 24), (72, 1, 38, 0, 1, 87, 93), (73, 6, 32, 0, 1, 17, 73), (74, 53, 34, 0, 1, 81, 25), (75, 14, 29, 0, 1, 14, 45), (76, 4, 36, 1, 1, 82, 30), (77, 54, 21, 0, 1, 59, 11), (78, 1, 26, 1, 1, 35, 54), (79, 1, 32, 1, 1, 4, 19), (80, 8, 42, 0, 1, 60, 22), (81, 5, 40, 1, 1, 67, 21), (82, 1, 37, 1, 1, 84, 92), (83, 1, 47, 0, 1, 21, 33), (84, 2, 32, 1, 1, 95, 16), (85, 7, 41, 1, 0, 9, 74), (86, 1, 46, 1, 0, 74, 84), (87, 10, 26, 1, 1, 2, 82), (88, 24, 30, 0, 0, 1, 1), (89, 7, 32, 1, 1, 24, 81), (90, 12, 31, 1, 0, 96, 93), (91, 4, 35, 0, 1, 42, 87), (92, 57, 36, 0, 1, 15, 94), (93, 1, 41, 1, 1, 16, 18), (94, 12, 36, 1, 0, 11, 17), (95, 7, 35, 1, 1, 80, 47), (96, 1, 34, 1, 1, 79, 89), (97, 5, 28, 0, 1, 62, 6), (98, 60, 29, 0, 0, 46, 50), (99, 2, 35, 1, 0, 55, 76), (100, 1, 34, 1, 1, 52, 70)], dtype=[('ID', '<i4'), ('time', '<i4'), ('age', '<i4'), ('drug', '<i4'), ('censor', '<i4'), ('entdate', '<i4'), ('enddate', '<i4')])
?Rpull