Side-by-side comparisons of query results across multiple database connections. Depends on ipython-sql.

In [ ]:
%load_ext sql
import pandas as pd
from IPython.core.display import HTML
class SQL_Comparitor(object):
    def __init__(self, *connection_strings):
        self.connection_strings = connection_strings
    def run(self, qry):
        dframes = []
        for connection_string in self.connection_strings:
            result = %sql $connection_string $qry
            short_name = connection_string.split('@')[1]            
            keys = [result.keys[0]]
            for key in result.keys[1:]:
                keys.append('%s_%s' % (short_name, key))
            dframes.append(pd.DataFrame(result, columns=keys))
        result = dframes[0]
        for dframe in dframes[1:]:
            result = pd.merge(result, dframe, on=keys[0])
        return result
In [ ]:
comparitor = SQL_Comparitor('mysql+pymysql://username:[email protected]/db1', 
                            'mysql+pymysql://username:[email protected]/db2',
                           )
results = comparitor.run("SHOW VARIABLES LIKE '%cache%' ")
HTML(results.to_html())