#!/usr/bin/env python
# coding: utf-8
# [Sebastian Raschka](http://www.sebastianraschka.com)
#
# [back](https://github.com/rasbt/matplotlib-gallery) to the `matplotlib-gallery` at [https://github.com/rasbt/matplotlib-gallery](https://github.com/rasbt/matplotlib-gallery)
# In[1]:
get_ipython().run_line_magic('load_ext', 'watermark')
# In[2]:
get_ipython().run_line_magic('watermark', '-u -v -d -p matplotlib,numpy')
# [More info](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/ipython_magic/watermark.ipynb) about the `%watermark` extension
# In[3]:
get_ipython().run_line_magic('matplotlib', 'inline')
# # Scatter plots in matplotlib
# # Sections
# - [Basic scatter plot](#Basic-scatter-plot)
# - [Scatter plot with labels](#Scatter-plot-with-labels)
# - [Scatter plot of 2 classes with decision boundary](#Scatter-plot-of-2-classes-with-decision-boundary)
# - [Increasing point size with distance from the origin](#Increasing-point-size-with-distance-from-the-origin)
#
#
# # Basic scatter plot
# [[back to top](#Sections)]
# In[8]:
from matplotlib import pyplot as plt
import numpy as np
# Generating a Gaussion dataset:
# creating random vectors from the multivariate normal distribution
# given mean and covariance
mu_vec1 = np.array([0,0])
cov_mat1 = np.array([[2,0],[0,2]])
x1_samples = np.random.multivariate_normal(mu_vec1, cov_mat1, 100)
x2_samples = np.random.multivariate_normal(mu_vec1+0.2, cov_mat1+0.2, 100)
x3_samples = np.random.multivariate_normal(mu_vec1+0.4, cov_mat1+0.4, 100)
# x1_samples.shape -> (100, 2), 100 rows, 2 columns
plt.figure(figsize=(8,6))
plt.scatter(x1_samples[:,0], x1_samples[:,1], marker='x',
color='blue', alpha=0.7, label='x1 samples')
plt.scatter(x2_samples[:,0], x1_samples[:,1], marker='o',
color='green', alpha=0.7, label='x2 samples')
plt.scatter(x3_samples[:,0], x1_samples[:,1], marker='^',
color='red', alpha=0.7, label='x3 samples')
plt.title('Basic scatter plot')
plt.ylabel('variable X')
plt.xlabel('Variable Y')
plt.legend(loc='upper right')
plt.show()
#
#
# # Scatter plot with labels
# [[back to top](#Sections)]
# In[4]:
import matplotlib.pyplot as plt
x_coords = [0.13, 0.22, 0.39, 0.59, 0.68, 0.74, 0.93]
y_coords = [0.75, 0.34, 0.44, 0.52, 0.80, 0.25, 0.55]
fig = plt.figure(figsize=(8,5))
plt.scatter(x_coords, y_coords, marker='s', s=50)
for x, y in zip(x_coords, y_coords):
plt.annotate(
'(%s, %s)' %(x, y),
xy=(x, y),
xytext=(0, -10),
textcoords='offset points',
ha='center',
va='top')
plt.xlim([0,1])
plt.ylim([0,1])
plt.show()
#
#
# # Scatter plot of 2 classes with decision boundary
# [[back to top](#Sections)]
# In[5]:
# 2-category classification with random 2D-sample data
# from a multivariate normal distribution
import numpy as np
from matplotlib import pyplot as plt
def decision_boundary(x_1):
""" Calculates the x_2 value for plotting the decision boundary."""
return 4 - np.sqrt(-x_1**2 + 4*x_1 + 6 + np.log(16))
# Generating a Gaussion dataset:
# creating random vectors from the multivariate normal distribution
# given mean and covariance
mu_vec1 = np.array([0,0])
cov_mat1 = np.array([[2,0],[0,2]])
x1_samples = np.random.multivariate_normal(mu_vec1, cov_mat1, 100)
mu_vec1 = mu_vec1.reshape(1,2).T # to 1-col vector
mu_vec2 = np.array([1,2])
cov_mat2 = np.array([[1,0],[0,1]])
x2_samples = np.random.multivariate_normal(mu_vec2, cov_mat2, 100)
mu_vec2 = mu_vec2.reshape(1,2).T # to 1-col vector
# Main scatter plot and plot annotation
f, ax = plt.subplots(figsize=(7, 7))
ax.scatter(x1_samples[:,0], x1_samples[:,1], marker='o', color='green', s=40, alpha=0.5)
ax.scatter(x2_samples[:,0], x2_samples[:,1], marker='^', color='blue', s=40, alpha=0.5)
plt.legend(['Class1 (w1)', 'Class2 (w2)'], loc='upper right')
plt.title('Densities of 2 classes with 25 bivariate random patterns each')
plt.ylabel('x2')
plt.xlabel('x1')
ftext = 'p(x|w1) ~ N(mu1=(0,0)^t, cov1=I)\np(x|w2) ~ N(mu2=(1,1)^t, cov2=I)'
plt.figtext(.15,.8, ftext, fontsize=11, ha='left')
# Adding decision boundary to plot
x_1 = np.arange(-5, 5, 0.1)
bound = decision_boundary(x_1)
plt.plot(x_1, bound, 'r--', lw=3)
x_vec = np.linspace(*ax.get_xlim())
x_1 = np.arange(0, 100, 0.05)
plt.show()
#
#
# # Increasing point size with distance from the origin
# [[back to top](#Sections)]
# In[27]:
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8,6))
# Generating a Gaussion dataset:
# creating random vectors from the multivariate normal distribution
# given mean and covariance
mu_vec1 = np.array([0,0])
cov_mat1 = np.array([[1,0],[0,1]])
X = np.random.multivariate_normal(mu_vec1, cov_mat1, 500)
R = X**2
R_sum = R.sum(axis=1)
plt.scatter(X[:, 0], X[:, 1],
color='gray',
marker='o',
s=32. * R_sum,
edgecolor='black',
alpha=0.5)
plt.show()
# In[ ]: