%matplotlib inline from __future__ import division import re # Expresiones regulares import numpy as np import matplotlib.pyplot as plt import pandas as pd import quantities as pq import scipy.stats as s # This IPython magic generates a table with version information #https://github.com/jrjohansson/version_information %load_ext version_information %version_information re, numpy, matplotlib, pandas, quantities, scipy # Definition of new constants: earth_radius = 6378160.0 * pq.m moon_radius = 1740000.0 * pq.m sun_radius = 695000000.0 *pq.m earth_mass = 5.97219e24 * pq.kg sun_mass = 332946 * earth_mass Hubble_constant = 67.80 * (pq.km/pq.s)/(1e6*pq.pc) # Definition of new units Mly = pq.UnitQuantity('megalight_year', 1e6*pq.ly, symbol = 'Mly') Mpc = pq.UnitQuantity('megaparsec', 1e6*pq.pc, symbol = 'Mpc') # Example: supose that in the light curve of a Ia supernova # we measure a maximum apparent magnitude of 19.5 with the V filter m = 19.5 M = -19.3 k = (m-M)/5 - 5 d = 10**k print "The luminosity distance is %d Mpc" %d # Get a list with the names of all the downloaded files in the subdirectory lc_files = !ls ../SDSS_light_curves lc_files = list(lc_files.n.split('\n')) lc_files = lc_files[1:] # Delete the ReadMe file from the list #A sample of the names... print lc_files[0:3] filename = lc_files[0] df = pd.read_table('../SDSS_light_curves/'+filename, skiprows=18, sep=r'\s+') #Fields separated by spaces df.columns # List the columns of the dataframe # Let's use just the four first columns df = df.ix[:,0:4] # Change the name of the first column... df = df.rename(columns={'#FLAG':'FLAG'}) df.head() with open('../SDSS_light_curves/'+filename) as f: f.readline() scndline = f.readline() resp = re.search(r'[\d.]+', scndline) # Using a regular expression Z = resp.group() print scndline print type(Z), Z df['Z'] = float(Z) print filename # Extract the supernova id from the filename using a reguolar expression: resp = re.search(r'\w+', filename) SN = resp.group() print SN # See if it works... df['SN'] = SN df.head() # Look at a sample of the data for filename in lc_files[1:]: dftemp = pd.read_table('../SDSS_light_curves/'+filename, skiprows=18, sep=r'\s+') dftemp = dftemp.ix[:,0:4] dftemp = dftemp.rename(columns={'#FLAG':'FLAG'}) with open('../SDSS_light_curves/'+filename) as f: f.readline() scndline = f.readline() resp = re.search(r'[\d.]+', scndline) Z = resp.group() dftemp['Z'] = float(Z) resp = re.search(r'\w+', filename) SN = resp.group() dftemp['SN'] = SN df = pd.concat([df, dftemp], ignore_index=True) df.info() df[100:110] # Show some of the rows... b = df['FLAG'] < 1024 df = df[b] df.info() # Create a list with all Supernovae id list_SN = list(df['SN'].unique()) # We choose one of them SNid = list_SN[30] # Select filter 'g' values (filter 1) b = (df['SN'] == SNid) & (df['FILT']==1) df_g = df[b].sort(columns = 'MJD') # And next filter 'r' values (filter 2) b = (df['SN'] == SNid) & (df['FILT']==2) df_r = df[b].sort(columns = 'MJD') f, (ax1, ax2) = plt.subplots(1,2, sharex=True) f.set_size_inches(12, 5) f.suptitle(SNid, fontsize=20) f.subplots_adjust(hspace=0) ax1.plot(df_g['MJD'], df_g['MAG'],'o-g') ax1.invert_yaxis() ax1.set_title('Filter g', fontsize=15) ax1.set_xlabel('MJD') ax1.set_ylabel('MAG') ax2.plot(df_r['MJD'], df_r['MAG'],'o-r') ax2.invert_yaxis() ax2.set_title('Filter r', fontsize=15) ax2.set_xlabel('MJD') ax2.set_ylabel('MAG'); b = df['FILT'] == 1 # Select "g" filter dftemp1 = df.ix[b,['MAG', 'Z', 'SN']] dftemp1.columns = ['g', 'Z', 'SN'] grouped = dftemp1.groupby('SN') # Group by supernova id df_g = grouped.min() #Select minimum values b = df['FILT'] == 2 # Select "r" filter dftemp2 = df.ix[b,['MAG', 'SN']] dftemp2.columns = ['r', 'SN'] grouped = dftemp2.groupby('SN') # Group by supernova id df_r = grouped.min() #Select magnitude minimum values # Finally we concatenate the columns # of the last two dataframes # Pandas will handle aligning the values # which correspond to a single supernova df_gr = pd.concat([df_g, df_r], axis=1) df_gr['g'] = df_gr['g'].astype(np.float) df_gr['r'] = df_gr['r'].astype(np.float) df_gr = df_gr[['Z', 'g', 'r']] #Reordering columns df_gr.head() df_gr.ix['SN722',:] M = -19.3 m = 19.394 k = (m-M)/5 - 5 d = 10**k # this is the distance in Mpc print "Distance without correction: %.2f Mpc" %d A = 0.10 m -= A k = (m-M)/5 - 5 d = 10**k print "Distance with correction: %.2f Mpc" %d # SFD extinctions [ugriz] = [ 0.13 0.10 0.07 0.05 0.04 ] lc_files = !ls ../SDSS_light_curves lc_files = list(lc_files.n.split('\n')) # Create a list with filenames lc_files = lc_files[1:] # Remove the ReadMe file from the list #Create a list with supernovae names # Obtained from the list of file names SN = lambda filename: re.search(r'\w+', filename).group() #Create two lists with the values of the index and the column names # of the new dataframe to be created with the extinctions corrections # for each SN and filter index = [SN(filename) for filename in lc_files] columns = ['Au', 'Ag', 'Ar', 'Ai', 'Az'] #Create an empty dataframe with the right structure df_ext = pd.DataFrame(index=index, columns=columns) df_ext.head() # Finally all files are read # Extracting the values of the extinctions # and populating the dataframe df_ext for filename in lc_files: id = SN(filename) # Extract supernova id with open('../SDSS_light_curves/'+filename) as f: for linea in f: if 'extinctions' in linea: # This is the line containing extinctions l_campos = linea.split() # Write extinctions in the dataframe df_ext.ix[id] = [float(a) for a in l_campos[6:11]] break # Let's check that everything seems OK df_ext.head() # Let's first do a deep copy of the dataframe # i.e., copying also the data df_gr_ext = df_gr.copy() # apply extinction corrections df_gr_ext['g'] = df_gr['g'] - df_ext['Ag'] df_gr_ext['r'] = df_gr['r'] - df_ext['Ar'] # Check in a sample we got the right values df_gr_ext.head() df_gr_ext['V'] = df_gr_ext['g'] - 0.5784 * (df_gr_ext['g']-df_gr_ext['r']) - 0.0038 df_gr_ext.head() df_gr_ext['Z'].min(), df_gr_ext['Z'].max() z = df_gr_ext['Z'] #Pandas series without any attached units. The velocities are in Km/s v = z * pq.c.rescale(pq.km/pq.s).magnitude M = -19.3 #m = df_gr_ext['V'] m = df_gr_ext['V'].astype(np.float) k = (m-M)/5 - 5 # d is a Pandas Series with the distance in Mpc d = 10**k slope, intercept, r_value, p_value, std_err = s.linregress(d,v) print 'The coefficient of determination is r^2 = %.2f' %r_value**2 print 'H0 = %.2f' %slope f, ax = plt.subplots(1) f.set_size_inches(12,8) ax.scatter(d, v, edgecolors='none') ax.set_xlim(0, 2500) ax.set_ylim(0,140000) ax.set_title('Hubble diagram of SDSS-II survey Type Ia supernovae', fontsize=20) ax.set_xlabel('Distances in Mpc') ax.set_ylabel('$c \, z \; Km \, s^{-1}$') ax.xaxis.label.set_fontsize(15) ax.yaxis.label.set_fontsize(20) ax.plot([0, 3000], [intercept, slope * 3000 + intercept], '-k', lw=2) ax.text(1000, 80000,'Value obtained $H0 = %.2f \; Km \; s^{-1} \; Mpc^{-1}$' %slope, fontsize=15, rotation = 39) ax.plot([0, 3000], [intercept, 67.80 * 3000 + intercept], ':r', lw=2) ax.text(500, 130000,'Best current value $H0 = 67.80 \; Km \; s^{-1} \; Mpc^{-1}$', fontsize=15, rotation = 41, color='r') ax.grid();