%matplotlib inline

from __future__ import division

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Generar un cuadro con versiones de las librerías utilizadas en este notebook
#https://github.com/jrjohansson/version_information
%load_ext version_information
%version_information numpy, matplotlib, pandas

filename = '../Hipparcos/I_239_selection.tsv'
df = pd.read_table(filename, skiprows=44, sep=';', header=None, index_col=0,
                   names = ['HIP', 'Vmag', 'Plx', 'B-V', 'SpType'],
                   skipfooter=1, engine='python')

df.head()

df.tail()

df.describe()

df_clean = df.applymap(lambda x: np.nan if isinstance(x, basestring) and x.isspace() else x)

df_clean.describe()

df_clean= df_clean.dropna()
df_clean.describe()

df_clean.shape

df_clean['Vmag'] = df_clean['Vmag'].astype(np.float)
df_clean['Plx'] = df_clean['Plx'].astype(np.float)
df_clean['B-V'] = df_clean['B-V'].astype(np.float)
df_clean['M_V'] = df_clean['Vmag'] + 5 * np.log10(df_clean['Plx']/100.)
df_clean.head()

# Se eliminan las filas que no cumplen la condición alfa + num
f = lambda s: (len(s) >= 2)  and (s[0].isalpha()) and (s[1].isdigit())
i  = df_clean['SpType'].apply(f)
df_clean = df_clean[i]

# Se crea una columna nueva con los dos primeros caracteres de 'SpType'
f = lambda s: s[0:2]
df_clean['SpType2'] = df_clean['SpType'].apply(f)

df_clean.shape

df_clean.head()

f = lambda s: s[0]
clases = df_clean['SpType'].map(f)
clases.value_counts()

f = lambda s: s[0] in 'OBAFGKM'
df_clean = df_clean[df_clean['SpType'].map(f)]

f = lambda s: s[0]
clases = df_clean['SpType'].map(f)
clases.value_counts()

orden = {'O':'0', 'B':'1', 'A':'2', 'F':'3', 'G':'4', 'K':'5', 'M':'6'}
f = lambda s: orden[s[0]]+s[1]
df_clean['SpType2'] = df_clean['SpType2'].apply(f)
df_clean.head()

fig, ax = plt.subplots(figsize=(8,10))

ax.set_xlim(0, 70)
ax.set_ylim(15, -10)
ax.grid()
ax.set_title(u'Diagrama H-R \n (Catálogo Hipparcos)')

ax.title.set_fontsize(20)
ax.set_xlabel('Clase espectral')
ax.xaxis.label.set_fontsize(20)
ax.set_ylabel('Magnitud absoluta')
ax.yaxis.label.set_fontsize(20)

ax.scatter(df_clean['SpType2'].astype(np.int), df_clean['M_V'],
           s=50, edgecolors='none', alpha=0.015, c='k')
ax.set_xticks(range(5,75,10))
ax.set_xticklabels(['O', 'B', 'A', 'F', 'G', 'K', 'M'])
ax.tick_params(axis='both', labelsize=14)


fig, ax = plt.subplots(figsize=(8,10))

ax.set_xlim(-0.5, 2.5)
ax.set_ylim(15, -10)
ax.grid()
ax.set_title(u'Diagrama H-R \n (Catálogo Hipparcos)')

ax.title.set_fontsize(20)
ax.set_xlabel(u'Índice de color B-V')
ax.xaxis.label.set_fontsize(20)
ax.set_ylabel('Magnitud absoluta')
ax.yaxis.label.set_fontsize(20)

ax.scatter(df_clean['B-V'], df_clean['M_V'],
#           s=50, edgecolors='none', alpha=0.015, c='k')
           s=1, edgecolors='none', c='k')

ax.tick_params(axis='both', labelsize=14)


f = lambda s: 'VII' in s
b = df_clean['SpType'].map(f)
print "Clase VII: enanas blancas, hay %d estrellas" %sum(b)

f = lambda s: ('VI' in s) and ('VII' not in s)
b = df_clean['SpType'].map(f)
print "Clase VI: subenanas, hay %d estrellas" %sum(b)

f = lambda s: ('V' in s) and ('VI' not in s) and ('IV' not in s)
b = df_clean['SpType'].map(f)
print "Clase V: secuencia principal, hay %d estrellas" %sum(b)

f = lambda s: 'IV' in s
b = df_clean['SpType'].map(f)
print "Clase IV: subgigantes, hay %d estrellas" %sum(b)

f = lambda s: 'III' in s
b = df_clean['SpType'].map(f)
print "Clase III: gigantes, hay %d estrellas" %sum(b)

f = lambda s: ('II' in s) and ('III' not in s) and ('VII' not in s)
b = df_clean['SpType'].map(f)
print "Clase II: gigantes brillantes, hay %d estrellas" %sum(b)

f = lambda s: ('I' in s) and ('II' not in s) and ('V' not in s)
b = df_clean['SpType'].map(f)
print "Clase I: supergigantes, hay %d estrellas" %sum(b)

f = lambda s: ('I' not in s) and ('V' not in s)
b = df_clean['SpType'].map(f)
print sum(b)

def plot_lum_class(b,c, label):
    ''' b: Series booleana para hacer la selección
        c: Color
        label: etiqueta para la leyenda
    '''
    x = df_clean['B-V'][b]
    y = df_clean['M_V'][b]
    ax.scatter(x, y, c = c, s=6, edgecolors='none', label = label)

fig = plt.figure(figsize=(8,10))
ax = fig.add_subplot(111, axisbg='0.8')

ax.set_xlim(-0.5, 2.5)
ax.set_ylim(15, -15)
ax.grid()
ax.set_title(u'Diagrama H-R \n (Catálogo Hipparcos)')

ax.title.set_fontsize(20)
ax.set_xlabel(u'Índice de color B-V')
ax.xaxis.label.set_fontsize(20)
ax.set_ylabel('Magnitud absoluta')
ax.yaxis.label.set_fontsize(20)

f = lambda s: 'VII' in s
b = df_clean['SpType'].map(f)
plot_lum_class(b,'white', 'VII: enanas blancas')

f = lambda s: ('VI' in s) and ('VII' not in s)
b = df_clean['SpType'].map(f)
plot_lum_class(b,'blue', 'VI: subenanas')

f = lambda s: ('V' in s) and ('VI' not in s) and ('IV' not in s)
b = df_clean['SpType'].map(f)
plot_lum_class(b,'black', 'V: secuencia principal')

f = lambda s: 'IV' in s
b = df_clean['SpType'].map(f)
plot_lum_class(b,'grey', 'IV: subgigantes')

f = lambda s: 'III' in s
b = df_clean['SpType'].map(f)
plot_lum_class(b,'green', 'III: gigantes')

f = lambda s: ('II' in s) and ('III' not in s) and ('VII' not in s)
b = df_clean['SpType'].map(f)
plot_lum_class(b,'orange', 'II: gigantes brillantes')

f = lambda s: ('I' in s) and ('II' not in s) and ('V' not in s)
b = df_clean['SpType'].map(f)
plot_lum_class(b,'yellow', 'I: supergigantes')

ax.tick_params(axis='both', labelsize=14)
legend = ax.legend(scatterpoints=1,markerscale = 6, shadow=True)
frame = legend.get_frame()
frame.set_facecolor('0.90')