anhima.ped
- Pedigrees¶import numpy as np
np.random.seed(1)
import scipy.stats
import itertools
import random
random.seed(1)
import matplotlib.pyplot as plt
%matplotlib inline
import sys
import anhima
# dev imports
# sys.path.insert(0, '../src')
# %reload_ext autoreload
# %autoreload 1
# %aimport anhima.sim
# %aimport anhima.gt
# %aimport anhima.ped
# simulate parent diplotype
n_variants = 1000
p_alt = .5
parent_diplotype_truth = scipy.stats.bernoulli.rvs(p_alt, size=1000*2).reshape(1000, 2)
# introduce some missingness
p_missing = .03
n_missing = scipy.stats.binom(p=p_missing, n=n_variants).rvs()
loc_missing = random.sample(range(n_variants), n_missing)
parent_diplotype = parent_diplotype_truth.copy()
parent_diplotype[loc_missing] = (-1, -1)
parent_diplotype
array([[ 0, 1], [ 0, 0], [-1, -1], ..., [ 1, 0], [ 0, 1], [ 0, 1]])
anhima.gt.count_called(parent_diplotype)
969
anhima.gt.count_hom_ref(parent_diplotype)
218
anhima.gt.count_het(parent_diplotype)
498
anhima.gt.count_hom_alt(parent_diplotype)
253
# simulate gamete haplotypes
n_gametes = 20
gamete_haplotypes = np.empty((n_variants, n_gametes), dtype='i1')
n_crossovers = scipy.stats.poisson.rvs(.8, size=n_gametes)
p_mendel_error = .03
for i in range(n_gametes):
# randomly choose which parent to start with
parent = scipy.stats.bernoulli(.5).rvs()
h = parent_diplotype_truth[:, parent].copy()
# simulate crossovers
loc_switches = sorted(np.random.randint(0, n_variants, size=n_crossovers[i]))
for l in loc_switches:
parent = 0 if parent == 1 else 1
h[l:] = parent_diplotype_truth[l:, parent]
# simulate errors
n_me = scipy.stats.binom(p=p_mendel_error, n=n_variants).rvs()
loc_me = random.sample(range(n_variants), n_me)
h[loc_me] = scipy.stats.bernoulli.rvs(.5, size=n_me)
# simulate missingness
n_missing = scipy.stats.binom(p=p_missing, n=n_variants).rvs()
loc_missing = random.sample(range(n_variants), n_missing)
h[loc_missing] = -1
gamete_haplotypes[:, i] = h
gamete_haplotypes
array([[0, 0, 0, ..., 1, 1, 1], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [1, 1, 1, ..., 1, 1, 1], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]], dtype=int8)
inh = anhima.ped.diploid_inheritance(parent_diplotype, gamete_haplotypes)
inh
array([[1, 1, 1, ..., 2, 2, 2], [3, 3, 3, ..., 3, 3, 3], [6, 6, 6, ..., 6, 6, 6], ..., [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]], dtype=uint8)
inheritance_colors = ['red', # parent 1
'blue', # parent 2
'green', # parents both ref
'orange', # parents both alt
'black', # non-parental
'yellow', # parents missing
'white'] # missing
fig, ax = plt.subplots(figsize=(12, 8))
anhima.gt.plot_discrete_calldata(inh,
colors=inheritance_colors,
states=range(1, 8),
ax=ax);
inh_seg = inh[anhima.gt.is_het(parent_diplotype), :]
inh_seg.shape
(498, 20)
fig, ax = plt.subplots(figsize=(12, 8))
anhima.gt.plot_discrete_calldata(inh_seg,
colors=inheritance_colors,
states=range(8),
ax=ax);