#!/usr/bin/env python # coding: utf-8 # # gathergrams - display gather results # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pylab import numpy import pandas as pd # In[2]: sample_id = 'SRR606249' df = pd.read_csv(f'outputs/{sample_id}.gather.csv') def fix_name(x): return "_".join(x.split('_')[:2]).split('.')[0] df['name'] = df['name'].apply(fix_name) df[:4] # # Figure 1: gathergram # In[3]: pylab.figure(num=None, figsize=(8, 6)) pylab.plot(df.index, df.intersect_bp / 1e6, 'x', label='all hashes classified to this genome') pylab.plot(df.index, df.unique_intersect_bp / 1e6, 'o', label='hashes specific to this genome') #pylab.plot(df.index, df.remaining_bp / 1e6, '-', label='unclassified bp remaining') pylab.xlabel('genome gather rank') pylab.ylabel('num hashes (millions)') pylab.legend(loc='upper right') pylab.title('hu-s1 gather x genbank') pylab.savefig(f'/tmp/gathergram-{sample_id}.pdf') # In[ ]: