#!/usr/bin/env python # coding: utf-8 # # # # Activation impacts on fast cloud responses in a coupled aerosol-climate model ... # # ## ... and nifty visualization / analysis tools # ### or - *Why I don't have cool results to show you (yet)* # **Daniel Rothenberg (darothen@mit.edu)** # # Joint Program Student Luncheon, July 16, 2015 #
# # # #
# # # # Python for Scientists # # - Mature (well-documented, efficient, easy-to-use) data analysis packages # - Portable ecosystem - any machine, any operating system # - Huge userbase # - Many cool tools in active development # - Supports many different development styles # - scripting (editing files from terminal/shell) # - notebook environment # - full-blown IDEs for software development - [Spyder](https://github.com/spyder-ide/spyder), [PyCharm](https://www.jetbrains.com/pycharm/), etc. # # ## Python helps create reproducible, verifiable science # # Reproducibility (1) - Version Control # In[12]: import subprocess # In[16]: def get_git_versioning(): """ Returns the currently checked out commit shortname. """ return subprocess.check_output( ['git', 'rev-parse', '--short', 'HEAD'] ).strip() print("Current HEAD git commit: ", str(get_git_versioning())) # **Fetch *git* commit at any time, attach it as metadata in whatever figure or output file you create** # But why should you care about this? # # In[35]: get_ipython().system('git log -2') # In[19]: import os pwd = os.getcwd() os.chdir("/Users/daniel/workspace/Research/marc_aie") # **Oh no, I broke something!** # In[30]: get_ipython().system('git diff ee8a2e9 ebe1ce2 marc_aie/convert.py') # ## But, I can't make my *[insert-super-secret-project]* public! # # # Reproducibility (2) - Environments # ## Common Problems # # - Something changed in a version of a toolkit or a package and now I get different answers! # - It's worse, my code doesn't even run any more! # - Someone forgot to tell me that I need package *xyz:v.a.b.c* but it won't compile on my machine! # - It's worse, it has a conflicting dependency with another package that I **really** need! # ## Solution - Package Managers # # ### [`conda`](http://conda.pydata.org/docs/) # # - A python package manager with sophisticated environment management (a là `virtualenv`) # - Maintain minimal Python installation for a given project # - Distribute and automatically build your dependencies # - Automatically comes with [Anaconda Python distribution](http//www.continuum.io/anaconda) and [Miniconda](http://conda.pydata.org/miniconda.html) # - Social site [binstar](https://binstar.org) for contributing packages # # ### [`Docker`](https://www.docker.com/) # # - Full-stack software management # - Rapidly re-deploy your entire working environment to a new machine (local, supercomputer, distributed) # **environment.yml** # # ```yaml # name: marc_aie # channels: # - unidata # - scitools # dependencies: # - cartopy>=0.12 # - ipython>=3.2.0 # - ipython-notebook # - matplotlib # - netcdf4 # - numpy # - python=3.4 # - seaborn # - xlrd # - xray>=0.5 # ``` # **Create environment based on `environment.yml`** # # ```bash # cd [my_repo_dir] # conda env create # ``` # **Activate environment** # # ```bash # source activate marc_aie # ``` # In[45]: get_ipython().system('conda info -e') # In[46]: get_ipython().system('conda list') # In[48]: get_ipython().system('binstar search -t conda cartopy') # # # Visualizations # # # **[Cartopy](http://scitools.org.uk/cartopy/docs/latest/) is a powerful wrapper for matplotlib enabling cartographic/geographic transformations of your data** # In[74]: import marc_aie as ma import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np v = ma.CESMVar("TS") v.load_datasets() v.apply(lambda ds: ds['TS'].mean("time")) data = v.data['arg_comp', 'F2000'] print(data) # In[83]: fig = plt.figure(figsize=(12, 5)) ax = fig.add_subplot(111, projection=ccrs.PlateCarree()) gp = ma.global_plot(data, ax=ax, cmap="cubehelix_r", levels=np.linspace(210, 320, 23)) cb = plt.colorbar(orientation='horizontal') _ = plt.title(v.long_name + " (%s)" % v.units) # ## Under the hood # # # # ([link to xray docs](http://xray.readthedocs.org/en/stable/)) # ## `xray` # # - Powerful toolkit for accessing and manipulating and wrangling NetCDF / HDF5 data # - Maintained by employees from [**THE CLIMATE CORPORATION**](https://www.climate.com/) # - Under active development # - Cool features: # - lazy evaluation system; doesn't do any numerical work until it *absolutely* needs to # - implements [`dask`](http://dask.pydata.org/en/latest/) out-of-core computation library # - extends pythonic interfaces for arrays and maps # - serialize to/from NetCDF # ### Simple xray example # In[90]: v = ma.CESMVar("TS") v.load_datasets() data = ma.create_master(v) # In[91]: print(data) # In[106]: pd_minus_pi = lambda ds: ds.sel(aer='F2000') - ds.sel(aer='F1850') a = (data['TS'] .pipe(pd_minus_pi) .mean('lon') .groupby('time.season') .mean('time')) print(a) # In[120]: import seaborn as sns sns.set(style='ticks') with sns.color_palette('Paired'): for seas in a.season: for act in a.act: d = a.sel(season=seas, act=act) plt.plot(d.lat, d, label="%s - %s" % (act.values, seas.values)) sns.despine(offset=10) plt.legend(loc='best')