import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import json
ls /Users/bussonniermatthias/dev/numpy/*.json
/Users/bussonniermatthias/dev/numpy/data-numpy-32ed43dfae87a2d65bc67004df279a7d3465ae34.json /Users/bussonniermatthias/dev/numpy/data-numpy-cdfbdf428d9df9c7119cecae323512a4cd3f57b7.json
with open('/Users/bussonniermatthias/dev/numpy/data-numpy-cdfbdf428d9df9c7119cecae323512a4cd3f57b7.json','r') as main:
dfm = pd.DataFrame(json.loads(main.read()))
with open('/Users/bussonniermatthias/dev/numpy/data-numpy-32ed43dfae87a2d65bc67004df279a7d3465ae34.json','r') as head:
dfh = pd.DataFrame(json.loads(head.read()))
df = pd.concat([dfm, dfh])
f1 = df.loc[df.git_rev == '32ed43dfae87a2d65bc67004df279a7d3465ae34'] # HEAD
f2 = df.loc[df.git_rev == 'cdfbdf428d9df9c7119cecae323512a4cd3f57b7']
f1.loc[f1.index, 'git_rev'] = 'HEAD'
f2.loc[f2.index, 'git_rev'] = 'MAIN'
df = pd.concat([f1,f2])
set(df.git_rev)
df['timing_ms'] = df['timing'] * 1000
df = df.drop(columns=['timing'])
fig, (ax1,ax2) = plt.subplots(2,1)
ax1.set_yscale('log')
ax2.set_yscale('log')
fig.set_figwidth(8)
fig.set_figheight(8)
sns.violinplot(df[df.eq_nan==True].drop(columns=['eq_nan']), x='key', y='timing_ms',hue="git_rev", ax=ax1)
ax1.set_title('eq_nan=True')
sns.violinplot(df[df.eq_nan==False].drop(columns=['eq_nan']), x='key', y='timing_ms',hue="git_rev", ax=ax2)
ax2.set_title('eq_nan=False')
ax1.tick_params(axis='x', rotation=10)
ax2.tick_params(axis='x', rotation=10)
df
key | eq_nan | git_rev | timing_ms | |
---|---|---|---|---|
0 | int_array-int_array | True | HEAD | 0.012708 |
1 | int_array-int_array | True | HEAD | 0.005750 |
2 | int_array-int_array | True | HEAD | 0.005458 |
3 | int_array-int_array | True | HEAD | 0.005416 |
4 | int_array-int_array | True | HEAD | 0.005125 |
... | ... | ... | ... | ... |
795 | float_array-float_array_2 | False | MAIN | 2.904834 |
796 | float_array-float_array_2 | False | MAIN | 2.908333 |
797 | float_array-float_array_2 | False | MAIN | 2.899000 |
798 | float_array-float_array_2 | False | MAIN | 2.907750 |
799 | float_array-float_array_2 | False | MAIN | 2.900042 |
1600 rows × 4 columns
sdf.stack()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[6], line 1 ----> 1 sdf.stack() NameError: name 'sdf' is not defined
df0 = pd.DataFrame()
sns.load_dataset("titanic")