import pandas as pd import numpy as np import seaborn as sns # allow plots to appear in the notebook %matplotlib inline df = pd.read_table('http://people.sc.fsu.edu/~jburkardt/datasets/regression/x01.txt', sep='\s+', skiprows=33, names=['id','brain','body'], index_col='id') df.head() df.describe() # only keep rows in which the body weight is less than 200 df = df[df.body < 200] df.shape sns.lmplot(x='body', y='brain', data=df, ci=None, fit_reg=False) sns.plt.xlim(-10, 200) sns.plt.ylim(-10, 250) sns.lmplot(x='body', y='brain', data=df, ci=None) sns.plt.xlim(-10, 200) sns.plt.ylim(-10, 250) # set a random seed for reproducibility np.random.seed(12345) # randomly assign every row to either sample 1 or sample 2 df['sample'] = np.random.randint(1, 3, len(df)) df.head() # col='sample' subsets the data by sample and creates two separate plots sns.lmplot(x='body', y='brain', data=df, ci=None, col='sample') sns.plt.xlim(-10, 200) sns.plt.ylim(-10, 250) # hue='sample' subsets the data by sample and creates a single plot sns.lmplot(x='body', y='brain', data=df, ci=None, hue='sample') sns.plt.xlim(-10, 200) sns.plt.ylim(-10, 250) sns.lmplot(x='body', y='brain', data=df, ci=None, col='sample', order=8) sns.plt.xlim(-10, 200) sns.plt.ylim(-10, 250) sns.lmplot(x='body', y='brain', data=df, ci=None, col='sample', order=2) sns.plt.xlim(-10, 200) sns.plt.ylim(-10, 250)