%matplotlib inline

from __future__ import division

import re    # Expresiones regulares
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import quantities as pq
import scipy.stats as s

# This IPython magic generates a table with version information
#https://github.com/jrjohansson/version_information
%load_ext version_information
%version_information re, numpy, matplotlib, pandas, quantities, scipy

# Definition of new constants:
earth_radius = 6378160.0 * pq.m
moon_radius = 1740000.0 * pq.m
sun_radius = 695000000.0 *pq.m
earth_mass =  5.97219e24 * pq.kg
sun_mass = 332946 * earth_mass
Hubble_constant = 67.80 * (pq.km/pq.s)/(1e6*pq.pc)

# Definition of new units
Mly = pq.UnitQuantity('megalight_year', 1e6*pq.ly, symbol = 'Mly')
Mpc = pq.UnitQuantity('megaparsec', 1e6*pq.pc, symbol = 'Mpc')

# Example: supose that in the light curve of a Ia supernova
# we measure a maximum apparent magnitude of 19.5 with the V filter
m = 19.5
M = -19.3
k = (m-M)/5 - 5
d = 10**k
print "The luminosity distance is %d Mpc" %d

# Get a list with the names of all the downloaded files in the subdirectory
lc_files = !ls ../SDSS_light_curves
lc_files = list(lc_files.n.split('\n'))
lc_files = lc_files[1:]    # Delete the ReadMe file from the list

#A sample of the names...
print lc_files[0:3]

filename = lc_files[0]
df = pd.read_table('../SDSS_light_curves/'+filename, 
     skiprows=18, sep=r'\s+') #Fields separated by spaces
df.columns    # List the columns of the dataframe

# Let's use just the four first columns
df = df.ix[:,0:4]

# Change the name of the first column...
df = df.rename(columns={'#FLAG':'FLAG'})
df.head()

with open('../SDSS_light_curves/'+filename) as f:
    f.readline()
    scndline = f.readline()
resp = re.search(r'[\d.]+', scndline)  # Using a regular expression
Z = resp.group()
print scndline
print type(Z), Z

df['Z'] = float(Z)

print filename

# Extract the supernova id from the filename using a reguolar expression:
resp = re.search(r'\w+', filename)
SN = resp.group()
print SN     # See if it works...

df['SN'] = SN

df.head() # Look at a sample of the data

for filename in lc_files[1:]:
    dftemp = pd.read_table('../SDSS_light_curves/'+filename, 
                           skiprows=18, sep=r'\s+')
    dftemp = dftemp.ix[:,0:4]
    dftemp = dftemp.rename(columns={'#FLAG':'FLAG'})
    with open('../SDSS_light_curves/'+filename) as f:
        f.readline()
        scndline = f.readline()
    resp = re.search(r'[\d.]+', scndline)
    Z = resp.group()
    dftemp['Z'] = float(Z)
    resp = re.search(r'\w+', filename)
    SN = resp.group()
    dftemp['SN'] = SN
    df = pd.concat([df, dftemp], ignore_index=True)

df.info()

df[100:110] # Show some of the rows...

b = df['FLAG'] < 1024
df = df[b]
df.info()

# Create a list with all Supernovae id
list_SN = list(df['SN'].unique())

# We choose one of them
SNid = list_SN[30]

# Select filter 'g' values (filter 1)
b = (df['SN'] == SNid) & (df['FILT']==1)
df_g = df[b].sort(columns = 'MJD')
# And next filter 'r' values (filter 2)
b = (df['SN'] == SNid) & (df['FILT']==2)
df_r = df[b].sort(columns = 'MJD')

f, (ax1, ax2) = plt.subplots(1,2, sharex=True)
f.set_size_inches(12, 5)
f.suptitle(SNid, fontsize=20)
f.subplots_adjust(hspace=0)
ax1.plot(df_g['MJD'], df_g['MAG'],'o-g')
ax1.invert_yaxis()
ax1.set_title('Filter g', fontsize=15)
ax1.set_xlabel('MJD')
ax1.set_ylabel('MAG')
ax2.plot(df_r['MJD'], df_r['MAG'],'o-r')
ax2.invert_yaxis()
ax2.set_title('Filter r', fontsize=15)
ax2.set_xlabel('MJD')
ax2.set_ylabel('MAG');

b = df['FILT'] == 1    # Select "g" filter
dftemp1 = df.ix[b,['MAG', 'Z', 'SN']]
dftemp1.columns = ['g', 'Z', 'SN']
grouped = dftemp1.groupby('SN') # Group by supernova id
df_g = grouped.min() #Select minimum values

b = df['FILT'] == 2    # Select "r" filter
dftemp2 = df.ix[b,['MAG', 'SN']]
dftemp2.columns = ['r', 'SN']
grouped = dftemp2.groupby('SN') # Group by supernova id
df_r = grouped.min()  #Select magnitude minimum values

# Finally we concatenate the columns
# of the last two dataframes
# Pandas will handle aligning the values
# which correspond to a single supernova
df_gr = pd.concat([df_g, df_r], axis=1)
df_gr['g'] = df_gr['g'].astype(np.float)
df_gr['r'] = df_gr['r'].astype(np.float)
df_gr = df_gr[['Z', 'g', 'r']] #Reordering columns
df_gr.head()

df_gr.ix['SN722',:]

M = -19.3
m = 19.394
k = (m-M)/5 - 5
d = 10**k    # this is the distance in Mpc
print "Distance without correction: %.2f Mpc" %d
A = 0.10
m -= A
k = (m-M)/5 - 5
d = 10**k
print "Distance with correction: %.2f Mpc" %d
#       SFD extinctions [ugriz] = [  0.13  0.10  0.07  0.05  0.04 ]
lc_files = !ls ../SDSS_light_curves
lc_files = list(lc_files.n.split('\n')) # Create a list with filenames
lc_files = lc_files[1:]    # Remove the ReadMe file from the list

#Create a list with supernovae names
# Obtained from the list of file names
SN = lambda filename: re.search(r'\w+', filename).group()

#Create two lists with the values of the index and the column names
# of the new dataframe to be created with the extinctions corrections 
# for each SN and filter
index = [SN(filename) for filename in lc_files]
columns = ['Au', 'Ag', 'Ar', 'Ai', 'Az']

#Create an empty dataframe with the right structure
df_ext = pd.DataFrame(index=index, columns=columns)
df_ext.head()

# Finally all files are read
# Extracting the values of the extinctions
# and populating the dataframe df_ext

for filename in lc_files:
    id = SN(filename)  # Extract supernova id
    with open('../SDSS_light_curves/'+filename) as f:
        for linea in f:
            if 'extinctions' in linea: # This is the line containing extinctions
                l_campos = linea.split()
                
                # Write extinctions in the dataframe
                df_ext.ix[id] = [float(a) for a in l_campos[6:11]]
                break

# Let's check that everything seems OK
df_ext.head()

# Let's first do a deep copy of the dataframe
# i.e., copying also the data
df_gr_ext = df_gr.copy()

# apply extinction corrections
df_gr_ext['g'] = df_gr['g'] - df_ext['Ag']
df_gr_ext['r'] = df_gr['r'] - df_ext['Ar']

# Check in a sample we got the right values
df_gr_ext.head()

df_gr_ext['V'] = df_gr_ext['g'] - 0.5784 * (df_gr_ext['g']-df_gr_ext['r']) - 0.0038
df_gr_ext.head()

df_gr_ext['Z'].min(), df_gr_ext['Z'].max()

z = df_gr_ext['Z']

#Pandas series without any attached units. The velocities are in Km/s
v = z * pq.c.rescale(pq.km/pq.s).magnitude

M = -19.3
#m = df_gr_ext['V']
m = df_gr_ext['V'].astype(np.float)
k = (m-M)/5 - 5

# d is a Pandas Series with the distance in Mpc
d = 10**k

slope, intercept, r_value, p_value, std_err = s.linregress(d,v)
print 'The coefficient of determination is r^2 = %.2f' %r_value**2
print 'H0 = %.2f' %slope

f, ax = plt.subplots(1)
f.set_size_inches(12,8)
ax.scatter(d, v, edgecolors='none')
ax.set_xlim(0, 2500)
ax.set_ylim(0,140000)


ax.set_title('Hubble diagram of SDSS-II survey Type Ia supernovae',
             fontsize=20)
ax.set_xlabel('Distances in Mpc')
ax.set_ylabel('$c \, z \; Km \, s^{-1}$')
ax.xaxis.label.set_fontsize(15)
ax.yaxis.label.set_fontsize(20)

ax.plot([0, 3000], [intercept, slope * 3000 + intercept], '-k', lw=2)
ax.text(1000, 80000,'Value obtained $H0 = %.2f \; Km \; s^{-1} \; Mpc^{-1}$'
        %slope, fontsize=15, rotation = 39)
ax.plot([0, 3000], [intercept, 67.80 * 3000 + intercept], ':r', lw=2)
ax.text(500, 130000,'Best current value $H0 = 67.80 \; Km \; s^{-1} \; Mpc^{-1}$',
        fontsize=15, rotation = 41, color='r')
ax.grid();