import rpy2.robjects as robjects
pi = robjects.r('pi')
pi[0]

rscript = '''
df = read.csv('iris.csv')
m = lm(Sepal_Length~Sepal_Width, data = df)
'''
pymodel = robjects.r(rscript)

print pymodel.names

import numpy as np
print np.mean(pymodel.rx2('residuals'))

res = robjects.r('summary(m)')
print res

print res.names

print res.rx2('coefficients')

print res.rx2('coefficients').rx(2,4) # p value of slope

print np.mat(res.rx2('coefficients'))
np.mat(res.rx2('coefficients'))[1,3]

import numpy as np
import matplotlib.pylab as plt
from scipy import stats
%matplotlib inline
x = np.random.randn(300)
density = stats.kde.gaussian_kde(x)
plt.hist(x, 30, normed=1, alpha=0.5, facecolor='#377EB8')
xd = np.linspace(min(x), max(x), 100)
plt.plot(xd, density(xd), lw=2, alpha=0.2,color='r') #line
plt.fill_between(xd, 0, density(xd), alpha=0.2, color='r')
plt.show()

%load_ext rpy2.ipython

%%R
df = read.csv('iris.csv')
m = lm(Sepal_Length~Sepal_Width, data = df)
res = summary(m)
res$coefficients

%%R -w 500 -h 300 
# 画图
x = rnorm(1000)
hist(x,c='gray')

%%R 
x = rnorm(100)

%%R
summary(x)

%%R -o x,y
x = rnorm(100)
y = rnorm(100)

x = np.array(x)
y = np.array(y)
z = np.random.choice(['r','b'], size=100, replace=True)
plt.show(plt.scatter(x,y,s=80,c=z ,alpha=0.7))

import pandas as pd
df = pd.DataFrame({'x':x, 'y':y, 'z':z})
df.head()

%%R -i df -w 500 -h 300 
library(ggplot2)
p = ggplot(df,aes(x = x, y = y, color = z)) + geom_point(size=4)
print(p)

%R -o coefs data(cars); model = lm(dist~speed, data=cars); coefs = model$coef 
coefs = np.array(coefs).round(2)
coefs

import requests
from bs4 import BeautifulSoup
import re
url = "http://movie.douban.com/top250"
r = requests.get(url)
soup_packtpage = BeautifulSoup(r.text)

def namefunc(movie):
    names = [x.findChild('span',attrs={'class':'title'}).string for x in movie]
    return names
def scorefunc(movie):
    scores = [float(str(x.findChild('em').string)) for x in movie]
    return scores
def numfunc(movie):
    num = [x.findChild('span',attrs=None).string for x in movie]
    num = [int(str(re.sub('\D', '', x))) for x in num]
    return num
url = "http://movie.douban.com/top250"
def getinfo(url):
    r = requests.get(url)
    soup_packtpage = BeautifulSoup(r.text)
    movie = soup_packtpage.findAll('div',attrs={'class':'info'})
    names = namefunc(movie)
    scores = scorefunc(movie)
    num = numfunc(movie)
    res = {'names': names, 'scores': scores, 'num': num}
    return res

urls = []
index = range(0,250,25)
for x in index:
    urls.append('http://movie.douban.com/top250?start='+str(x)+'&filter=&type=')
urls

res = {'names': [], 'scores': [], 'num': []}
for url in urls:
    new = getinfo(url)
    res['names'].extend(new['names'])
    res['scores'].extend(new['scores'])
    res['num'].extend(new['num'])

import pandas as pd
df = pd.DataFrame(res)
df.head()

%%R -i df -w 500 -h 300 
library(ggplot2)
p = ggplot(df,aes(x = num, y = scores)) + geom_point(size=4,alpha=0.5) + stat_smooth()
print(p)

!ipython nbconvert r_and_python.ipynb --to slides --post serve