import rpy2.robjects as robjects pi = robjects.r('pi') pi[0] rscript = ''' df = read.csv('iris.csv') m = lm(Sepal_Length~Sepal_Width, data = df) ''' pymodel = robjects.r(rscript) print pymodel.names import numpy as np print np.mean(pymodel.rx2('residuals')) res = robjects.r('summary(m)') print res print res.names print res.rx2('coefficients') print res.rx2('coefficients').rx(2,4) # p value of slope print np.mat(res.rx2('coefficients')) np.mat(res.rx2('coefficients'))[1,3] import numpy as np import matplotlib.pylab as plt from scipy import stats %matplotlib inline x = np.random.randn(300) density = stats.kde.gaussian_kde(x) plt.hist(x, 30, normed=1, alpha=0.5, facecolor='#377EB8') xd = np.linspace(min(x), max(x), 100) plt.plot(xd, density(xd), lw=2, alpha=0.2,color='r') #line plt.fill_between(xd, 0, density(xd), alpha=0.2, color='r') plt.show() %load_ext rpy2.ipython %%R df = read.csv('iris.csv') m = lm(Sepal_Length~Sepal_Width, data = df) res = summary(m) res$coefficients %%R -w 500 -h 300 # 画图 x = rnorm(1000) hist(x,c='gray') %%R x = rnorm(100) %%R summary(x) %%R -o x,y x = rnorm(100) y = rnorm(100) x = np.array(x) y = np.array(y) z = np.random.choice(['r','b'], size=100, replace=True) plt.show(plt.scatter(x,y,s=80,c=z ,alpha=0.7)) import pandas as pd df = pd.DataFrame({'x':x, 'y':y, 'z':z}) df.head() %%R -i df -w 500 -h 300 library(ggplot2) p = ggplot(df,aes(x = x, y = y, color = z)) + geom_point(size=4) print(p) %R -o coefs data(cars); model = lm(dist~speed, data=cars); coefs = model$coef coefs = np.array(coefs).round(2) coefs import requests from bs4 import BeautifulSoup import re url = "http://movie.douban.com/top250" r = requests.get(url) soup_packtpage = BeautifulSoup(r.text) def namefunc(movie): names = [x.findChild('span',attrs={'class':'title'}).string for x in movie] return names def scorefunc(movie): scores = [float(str(x.findChild('em').string)) for x in movie] return scores def numfunc(movie): num = [x.findChild('span',attrs=None).string for x in movie] num = [int(str(re.sub('\D', '', x))) for x in num] return num url = "http://movie.douban.com/top250" def getinfo(url): r = requests.get(url) soup_packtpage = BeautifulSoup(r.text) movie = soup_packtpage.findAll('div',attrs={'class':'info'}) names = namefunc(movie) scores = scorefunc(movie) num = numfunc(movie) res = {'names': names, 'scores': scores, 'num': num} return res urls = [] index = range(0,250,25) for x in index: urls.append('http://movie.douban.com/top250?start='+str(x)+'&filter=&type=') urls res = {'names': [], 'scores': [], 'num': []} for url in urls: new = getinfo(url) res['names'].extend(new['names']) res['scores'].extend(new['scores']) res['num'].extend(new['num']) import pandas as pd df = pd.DataFrame(res) df.head() %%R -i df -w 500 -h 300 library(ggplot2) p = ggplot(df,aes(x = num, y = scores)) + geom_point(size=4,alpha=0.5) + stat_smooth() print(p) !ipython nbconvert r_and_python.ipynb --to slides --post serve