통계적 사고 (2판) 연습문제 (thinkstats2.com, think-stat.xwmooc.org)
Allen Downey / 이광춘(xwMOOC)
%matplotlib inline
from __future__ import print_function
import math
import first
import chap07soln
import thinkstats2
import thinkplot
import numpy as np
NSFG 에서 나온 데이터를 사용해서, 출생체중과 산모연령 산점도를 그리시오. 출생체중과 산모연령 백분위수를 도식화하시오. 피어슨 상관과 스피어만 상관을 계산하시오. 두 번수 사이 관계를 어떻게 특징적으로 묘사할 수 있을까?
def ScatterPlot(ages, weights, alpha=1.0):
"""Make a scatter plot and save it.
ages: sequence of float
weights: sequence of float
alpha: float
"""
thinkplot.Scatter(ages, weights, alpha=alpha)
thinkplot.Config(xlabel='age (years)',
ylabel='weight (lbs)',
xlim=[10, 45],
ylim=[0, 15],
legend=False)
def BinnedPercentiles(df):
"""Bin the data by age and plot percentiles of weight for each bin.
df: DataFrame
"""
bins = np.arange(10, 48, 3)
indices = np.digitize(df.agepreg, bins)
groups = df.groupby(indices)
ages = [group.agepreg.mean() for i, group in groups][1:-1]
cdfs = [thinkstats2.Cdf(group.totalwgt_lb) for i, group in groups][1:-1]
thinkplot.PrePlot(3)
for percent in [75, 50, 25]:
weights = [cdf.Percentile(percent) for cdf in cdfs]
label = '%dth' % percent
thinkplot.Plot(ages, weights, label=label)
thinkplot.Show(xlabel="mother's age (years)",
ylabel='birth weight (lbs)')