import numpy as np np.random.seed(1) import scipy.stats import random random.seed(1) import matplotlib.pyplot as plt %matplotlib inline import sys import anhima # dev imports # sys.path.insert(0, '../src') # %reload_ext autoreload # %autoreload 1 # %aimport anhima.sim # %aimport anhima.gt # simulate non-uniform variant positions n_variants = 1000 p = 0 pos = [p] for i in range(n_variants-1): gap = int(np.abs(np.cos(i/100))*100) p += gap pos.append(p) pos = np.array(pos) # simulate genotypes n_samples = 100 ploidy = 2 af_dist = scipy.stats.beta(a=.4, b=.6) p_missing = .1 genotypes = anhima.sim.simulate_biallelic_genotypes(n_variants, n_samples, af_dist, p_missing, ploidy) genotypes genotypes.shape n_called = anhima.gt.count_called(genotypes) n_called n_missing = anhima.gt.count_missing(genotypes) n_missing n_hom = anhima.gt.count_hom(genotypes) n_hom n_het = anhima.gt.count_het(genotypes) n_het n_hom_ref = anhima.gt.count_hom_ref(genotypes) n_hom_ref n_hom_alt = anhima.gt.count_hom_alt(genotypes) n_hom_alt n_hom == n_hom_ref + n_hom_alt n_missing + n_hom_ref + n_het + n_hom_alt anhima.gt.as_haplotypes(genotypes) anhima.gt.as_n_alt(genotypes) anhima.gt.as_012(genotypes) packed = anhima.gt.pack_diploid(genotypes) packed.nbytes, genotypes.nbytes packed unpacked = anhima.gt.unpack_diploid(packed) assert np.all(unpacked == genotypes) gn = anhima.gt.as_012(genotypes) # plot missingness counts for sample 0 anhima.gt.plot_windowed_genotype_counts(pos, gn[:, 0], -1, 1000); # plot missingness rate for sample 0 anhima.gt.plot_windowed_genotype_rate(pos, gn[:, 0], -1, 3000); # plot heterozygosity density for sample 0 anhima.gt.plot_windowed_genotype_density(pos, gn[:, 0], 1, 1000); # plot hom alt density for sample 0 anhima.gt.plot_windowed_genotype_density(pos, gn[:, 0], 2, 1000); fig = plt.figure(figsize=(12, 8)) # plot genotypes ax = plt.subplot2grid((8, 1), (0, 0), rowspan=7) anhima.gt.plot_diploid_genotypes(gn, ax=ax) # plot variant locator ax = plt.subplot2grid((8, 1), (7, 0), rowspan=1) anhima.loc.plot_variant_locator(pos, step=10, ax=ax); # simulate continuous call data #dp = np.random.randint(low=0, high=50, size=(n_variants*n_samples)).reshape(n_variants, n_samples) dp = scipy.stats.norm(loc=25, scale=10).rvs(n_variants*n_samples).reshape(n_variants, n_samples) dp[dp < 0] = 0 fig = plt.figure(figsize=(12, 8)) # plot dp ax = plt.subplot2grid((8, 1), (0, 0), rowspan=7) anhima.gt.plot_continuous_calldata(dp, ax=ax, pcolormesh_kwargs=dict(cmap='Greys')) # plot variant locator ax = plt.subplot2grid((8, 1), (7, 0), rowspan=1) anhima.loc.plot_variant_locator(pos, step=10, ax=ax); ax = anhima.gt.plot_genotype_counts_by_sample(gn) ax.legend(loc='upper left', bbox_to_anchor=(1.03, 1)); ax = anhima.gt.plot_genotype_counts_by_sample(gn, orientation='horizontal') ax.legend(loc='upper left', bbox_to_anchor=(1.03, 1)); fig, ax = plt.subplots(figsize=(12, 6)) anhima.gt.plot_genotype_counts_by_variant(gn, ax=ax) ax.legend(loc='upper left', bbox_to_anchor=(1.03, 1)); fig, ax = plt.subplots(figsize=(6, 12)) anhima.gt.plot_genotype_counts_by_variant(gn, ax=ax, orientation='horizontal') ax.legend(loc='upper left', bbox_to_anchor=(1.03, 1)); fig = plt.figure(figsize=(16, 14)) layout = (16, 12) ax = plt.subplot2grid(layout, (0, 0), rowspan=4, colspan=10) anhima.gt.plot_genotype_counts_by_variant(gn, orientation='vertical', ax=ax) ax.set_yticks([]) ax = plt.subplot2grid(layout, (4, 0), rowspan=8, colspan=10) anhima.gt.plot_diploid_genotypes(gn, ax=ax) ax = plt.subplot2grid(layout, (4, 10), rowspan=8, colspan=4) anhima.gt.plot_genotype_counts_by_sample(gn, orientation='horizontal', ax=ax) ax.set_xticks([]) ax = plt.subplot2grid(layout, (12, 0), rowspan=2, colspan=10) anhima.loc.plot_variant_locator(pos, step=10, ax=ax); fig, ax = plt.subplots(figsize=(16, 6)) anhima.gt.plot_continuous_calldata_by_sample(dp, ax=ax);