Side-by-side comparisons of query results across multiple database connections. Depends on ipython-sql.
%load_ext sql
import pandas as pd
from IPython.core.display import HTML
class SQL_Comparitor(object):
def __init__(self, *connection_strings):
self.connection_strings = connection_strings
def run(self, qry):
dframes = []
for connection_string in self.connection_strings:
result = %sql $connection_string $qry
short_name = connection_string.split('@')[1]
keys = [result.keys[0]]
for key in result.keys[1:]:
keys.append('%s_%s' % (short_name, key))
dframes.append(pd.DataFrame(result, columns=keys))
result = dframes[0]
for dframe in dframes[1:]:
result = pd.merge(result, dframe, on=keys[0])
return result
comparitor = SQL_Comparitor('mysql+pymysql://username:password@server1/db1',
'mysql+pymysql://username:password@server2/db2',
)
results = comparitor.run("SHOW VARIABLES LIKE '%cache%' ")
HTML(results.to_html())