from impala.dbapi import connect conn = connect(host='diufpc57', port=21050) import pyxplorer as pxp data = pxp.Database("tpcc3", conn) data len(data) data.num_tables() sum([len(x.columns()) for x in data.tables()]) data.num_columns() data.num_tuples() tab = data['order_linep'] tab tab.size() len(tab.columns()) tab.columns() tab['ol_w_id'] tab.ol_w_id tab.ol_w_id.dcount() dist = tab.ol_w_id.distribution(limit=10000) %matplotlib inline dist.fraction.hist() data.dcounts().fraction.hist() len(data.dcounts()) data.dcounts().head(10) from pyxplorer.loader import Loader loader = Loader("/user/martin/test1", name_node="diufpc56", hive_server="diufpc301") loader.load() pyxdb = m.Database("default", conn) pyxdb tab = pyxdb["pyxplorer_data"] tab tab.columns() tab.col_0