%time !git clone git://git.openssl.org/openssl.git from IPython.display import IFrame IFrame("http://en.wikipedia.org/wiki/OpenSSL#History_of_the_OpenSSL_project", 800, 400) cd openssl/ !git log --reverse | head -40 !git log -1 !git log --oneline | wc -l !du -hs -I\.git !sloccount . !git log --format=format:"%ai,%an,%H" > ../commits cd .. import pandas as pd df=pd.read_csv("commits", header=None, names=["time", "author", "id"], index_col="time", parse_dates=True) df.sort(ascending=True, inplace=True) df.head() commits_per_author=df.author.value_counts() commits_per_author import seaborn as sns %matplotlib inline commits_per_author.plot(kind="bar", figsize=(10,6)) df["c"]=1 # counter commits_over_time=df.c.cumsum().plot() commits_over_time authors = commits_per_author.index timelines=pd.DataFrame(index=df.index) for author in authors: timelines[author]=df.c.where(df.author==author) timelines.head() default_palette = sns.color_palette() sns.set_palette("Set1") top_authors=authors[:10] timelines[top_authors].cumsum().plot(style="o",figsize=(20,10)) sns.set_palette(default_palette) per_months=timelines.resample("3M", how="sum") per_months["nauthors"]=per_months.applymap(lambda x: min(x, 1)).sum(axis=1) per_months["nauthors"].plot(kind="bar", figsize=(20,5)) cd openssl/ %%time filecounts = [] for commit in df["id"]: cfiles =! git ls-tree -r --name-only $commit filecounts.append(len(cfiles)) filestats=pd.DataFrame({"filecount": filecounts}, index=df.index) filestats.plot(figsize=(10,6)) file_changes =! git log --all -M -C --name-only --format='format:' | grep -v '^$' dfc = pd.Series(list(file_changes)) dfc.value_counts() c_changes=dfc.where(dfc.str.endswith(".c")).value_counts() c_changes c_changes.plot() h_changes=dfc.where(dfc.str.endswith(".h")).value_counts() h_changes