import ipyrad as ip
import ipyparallel as ipp
ipyclient = ipp.Client()
/home/deren/miniconda3/lib/python3.6/site-packages/ipyparallel/client/client.py:458: RuntimeWarning: Controller appears to be listening on localhost, but not on this machine. If this is true, you should specify Client(...,sshserver='you@oud') or instruct your controller to listen on an external IP. RuntimeWarning)
data = ip.Assembly("1-pairtest")
data.set_params("raw_fastq_path", "ipsimdata/pairddrad_example_R*_.fastq.gz")
data.set_params("barcodes_path", "ipsimdata/pairddrad_example_barcodes.txt")
data.set_params("datatype", "pairddrad")
data.set_params("restriction_overhang", ("TGCAG", "CGG"))
New Assembly: 1-pairtest
data = ip.Assembly("2-setest")
data.set_params("raw_fastq_path", "ipsimdata/rad_example_R1_.fastq.gz")
data.set_params("barcodes_path", "ipsimdata/rad_example_barcodes.txt")
data.set_params("datatype", "rad")
data.set_params("restriction_overhang", ("TGCAG", ""))
New Assembly: 2-setest
data = ip.Assembly("3-refsetest")
data.set_params("raw_fastq_path", "ipsimdata/rad_example_R1_.fastq.gz")
data.set_params("barcodes_path", "ipsimdata/rad_example_barcodes.txt")
data.set_params("datatype", "rad")
data.set_params("assembly_method", "reference")
data.set_params("reference_sequence", "ipsimdata/rad_example_genome.fa")
data.set_params("restriction_overhang", ("TGCAG", ""))
#data.get_params()
New Assembly: 3-refsetest
data = ip.Assembly("4-refpairtest")
data.set_params("raw_fastq_path", "ipsimdata/pairddrad_example_R*_.fastq.gz")
data.set_params("barcodes_path", "ipsimdata/rad_example_barcodes.txt")
data.set_params("datatype", "pairddrad")
data.set_params("assembly_method", "reference")
data.set_params("reference_sequence", "ipsimdata/pairddrad_example_genome.fa")
data.set_params("restriction_overhang", ("TGCAG", "CGG"))
#data.get_params()
New Assembly: 4-refpairtest
data = ip.Assembly("5-tortas")
data.set_params("project_dir", "tortas")
data.set_params("sorted_fastq_path", "/home/deren/Dropbox/Maud/fastq-concats/*.gz")
data.set_params("datatype", "pairddrad")
data.set_params("assembly_method", "reference")
data.set_params("reference_sequence", "/home/deren/Dropbox/Maud/lgeorge.genome.fa")
data.set_params("restriction_overhang", ("CATG", "AATT"))
data.set_params("filter_adapters", 2)
New Assembly: 5-tortas
data.run("12", force=True)
Assembly: 4-refpairtest [force] overwriting fastq files previously created by ipyrad. This _does not_ affect your original/raw data files. [####################] 100% 0:00:04 | sorting reads | s1 | [####################] 100% 0:00:02 | writing/compressing | s1 | [####################] 100% 0:00:04 | processing reads | s2 |
#data.run("12", force=True)
#data = ip.load_json("1-pairtest.json")
#data = ip.load_json("2-setest.json")
#data = ip.load_json("3-refsetest.json")
#data = ip.load_json("4-refpairtest.json")
data = ip.load_json("tortas/5-tortas.json")
loading Assembly: 5-tortas from saved path: ~/Documents/ipyrad/tests/tortas/5-tortas.json
state | reads_raw | reads_passed_filter | |
---|---|---|---|
AGO02concat | 2 | 11050294 | 10800672 |
AGO08concat | 2 | 13408401 | 13030329 |
AGO09concat | 2 | 15650127 | 15121047 |
AGO11concat | 2 | 12848936 | 12370018 |
from ipyrad.assemble.cluster_across import *
from ipyrad.assemble.clustmap import *
s3 = Step3(data, list(data.samples.values()), 0, 5, True, ipyclient)
samples = list(s3.data.samples.values())
sample = samples[1]
s3.data.ipcluster["threads"] = 4
s3.run()
[####################] 100% 0:00:01 | concatenating | s3 | [####################] 100% 0:00:01 | join unmerged pairs | s3 | [####################] 100% 0:00:00 | dereplicating | s3 | [####################] 100% 0:00:00 | splitting dereps | s3 | [####################] 100% 0:00:02 | mapping reads | s3 | [####################] 100% 0:00:10 | building clusters | s3 |
s3.data.run("45")
Assembly: 4-refpairtest [####################] 100% 0:00:04 | inferring [H, E] | s4 | [####################] 100% 0:00:00 | calculating depths | s5 | [####################] 100% 0:00:00 | chunking clusters | s5 | [####################] 100% 0:00:25 | consens calling | s5 |
#s3.data.stats
data = s3.data
jobid = 0
samples = list(data.samples.values())[:4]
randomseed = 123
conshandles = [
sample.files.consens[0] for sample in samples if
sample.stats.reads_consens]
conshandles.sort()
assert conshandles, "no consensus files found"
conshandles
['/home/deren/Documents/ipyrad/tests/4-refpairtest_consens/1B_0.consens.gz', '/home/deren/Documents/ipyrad/tests/4-refpairtest_consens/1D_0.consens.gz', '/home/deren/Documents/ipyrad/tests/4-refpairtest_consens/2H_0.consens.gz', '/home/deren/Documents/ipyrad/tests/4-refpairtest_consens/3J_0.consens.gz']
## concatenate all of the gzipped consens files
cmd = ['cat'] + conshandles
groupcons = os.path.join(
data.dirs.across,
"{}-{}-catcons.gz".format(data.name, jobid))
LOGGER.debug(" ".join(cmd))
with open(groupcons, 'w') as output:
call = sps.Popen(cmd, stdout=output, close_fds=True)
call.communicate()
## a string of sed substitutions for temporarily replacing hetero sites
## skips lines with '>', so it doesn't affect taxon names
subs = ["/>/!s/W/A/g", "/>/!s/w/A/g", "/>/!s/R/A/g", "/>/!s/r/A/g",
"/>/!s/M/A/g", "/>/!s/m/A/g", "/>/!s/K/T/g", "/>/!s/k/T/g",
"/>/!s/S/C/g", "/>/!s/s/C/g", "/>/!s/Y/C/g", "/>/!s/y/C/g"]
subs = ";".join(subs)
## pipe passed data from gunzip to sed.
cmd1 = ["gunzip", "-c", groupcons]
cmd2 = ["sed", subs]
LOGGER.debug(" ".join(cmd1))
LOGGER.debug(" ".join(cmd2))
proc1 = sps.Popen(cmd1, stdout=sps.PIPE, close_fds=True)
allhaps = groupcons.replace("-catcons.gz", "-cathaps.gz")
with open(allhaps, 'w') as output:
proc2 = sps.Popen(cmd2, stdin=proc1.stdout, stdout=output, close_fds=True)
proc2.communicate()
proc1.stdout.close()
data.dirs.across = os.path.join(data.name + "_across")
if not os.path.exists(data.dirs.across):
os.makedirs(data.dirs.across)
import ipyrad
conshandles = [
sample.files.consens[0] for sample in samples if
sample.stats.reads_consens]
conshandles.sort()
assert conshandles, "no consensus files found"
## concatenate all of the gzipped consens files
cmd = ['cat'] + conshandles
groupcons = os.path.join(
data.dirs.across,
"{}-{}-catcons.gz".format(data.name, jobid))
LOGGER.debug(" ".join(cmd))
with open(groupcons, 'w') as output:
call = sps.Popen(cmd, stdout=output, close_fds=True)
call.communicate()
## a string of sed substitutions for temporarily replacing hetero sites
## skips lines with '>', so it doesn't affect taxon names
subs = ["/>/!s/W/A/g", "/>/!s/w/A/g", "/>/!s/R/A/g", "/>/!s/r/A/g",
"/>/!s/M/A/g", "/>/!s/m/A/g", "/>/!s/K/T/g", "/>/!s/k/T/g",
"/>/!s/S/C/g", "/>/!s/s/C/g", "/>/!s/Y/C/g", "/>/!s/y/C/g"]
subs = ";".join(subs)
## impute pseudo-haplo information to avoid mismatch at hetero sites
## the read data with hetero sites is put back into clustered data later.
## pipe passed data from gunzip to sed.
cmd1 = ["gunzip", "-c", groupcons]
cmd2 = ["sed", subs]
LOGGER.debug(" ".join(cmd1))
LOGGER.debug(" ".join(cmd2))
proc1 = sps.Popen(cmd1, stdout=sps.PIPE, close_fds=True)
allhaps = groupcons.replace("-catcons.gz", "-cathaps.gz")
with open(allhaps, 'w') as output:
proc2 = sps.Popen(cmd2, stdin=proc1.stdout, stdout=output, close_fds=True)
proc2.communicate()
proc1.stdout.close()
## now sort the file using vsearch
allsort = groupcons.replace("-catcons.gz", "-catsort.fa")
cmd1 = [ipyrad.bins.vsearch,
"--sortbylength", allhaps,
"--fasta_width", "0",
"--output", allsort]
LOGGER.debug(" ".join(cmd1))
proc1 = sps.Popen(cmd1, close_fds=True)
proc1.communicate()
(None, None)
from ipyrad.assemble.cluster_across import *
random.seed(randomseed)
## open an iterator to lengthsorted file and grab two lines at at time
allshuf = groupcons.replace("-catcons.gz", "-catshuf.fa")
outdat = open(allshuf, 'wt')
indat = open(allsort, 'r')
idat = izip(iter(indat), iter(indat))
done = 0
chunk = [next(idat)]
while not done:
## grab 2-lines until they become shorter (unless there's only one)
oldlen = len(chunk[-1][-1])
while 1:
try:
dat = next(idat)
except StopIteration:
done = 1
break
if len(dat[-1]) == oldlen:
chunk.append(dat)
else:
## send the last chunk off to be processed
random.shuffle(chunk)
outdat.write("".join(chain(*chunk)))
## start new chunk
chunk = [dat]
break
## do the last chunk
random.shuffle(chunk)
outdat.write("".join(chain(*chunk)))
indat.close()
outdat.close()
def build_clusters_from_cigars(data, sample):
# get all regions with reads. Generator to yield (str, int, int)
fullregions = bedtools_merge(data, sample).strip().split("\n")
regions = (i.split("\t") for i in fullregions)
regions = ((i, int(j), int(k)) for (i, j, k) in regions)
# access reads from bam file using pysam
bamfile = AlignmentFile(
os.path.join(data.dirs.refmapping,
"{}-mapped-sorted.bam".format(sample.name)),
'rb')
# iterate over all regions
opath = os.path.join(
data.dirs.clusts, "{}.clustS.gz".format(sample.name))
out = gzip.open(opath, 'wt')
idx = 0
clusters = []
for reg in regions:
# uncomment and compare against ref sequence when testing
#ref = get_ref_region(data.paramsdict["reference_sequence"], *reg)
reads = bamfile.fetch(*reg)
# store reads in a dict
rdict = {}
# paired-end data cluster building
if "pair" in data.paramsdict["datatype"]:
# match paired reads together in a dictionary
for read in reads:
if read.qname not in rdict:
rdict[read.qname] = [read, None]
else:
rdict[read.qname][1] = read
# sort keys by derep number
keys = sorted(
rdict.keys(),
key=lambda x: int(x.split("=")[-1]), reverse=True)
# build the cluster based on map positions, orientation, cigar
clust = []
for key in keys:
r1, r2 = rdict[key]
if r1 and r2:
#lref = len(ref[1])
lref = reg[2] - reg[1]
arr1 = np.zeros(lref, dtype="U1")
arr2 = np.zeros(lref, dtype="U1")
arr1.fill("-")
arr2.fill("-")
# how far ahead of the start does this read begin
seq = cigared(r1.seq, r1.cigar)
start = r1.reference_start - reg[1]
arr1[start:start + len(seq)] = list(seq)
seq = cigared(r2.seq, r2.cigar)
start = r2.reference_start - reg[1]
arr2[start:start + len(seq)] = list(seq)
arr3 = join_arrays(arr1, arr2)
pairseq = "".join(arr3)
ori = "+"
if r1.is_reverse:
ori = "-"
derep = r1.qname.split("=")[-1]
rname = "{}:{}-{};size={};{}".format(*reg, derep, ori)
clust.append("{}\n{}".format(rname, pairseq))
# single-end data cluster building
else:
for read in reads:
rdict[read.qname] = read
# sort keys by derep number
keys = sorted(
rdict.keys(),
key=lambda x: int(x.split("=")[-1]), reverse=True)
# build the cluster based on map positions, orientation, cigar
clust = []
for key in keys:
r1 = rdict[key]
#aref = np.array(list(ref[1]))
lref = reg[2] - reg[1]
arr1 = np.zeros(lref, dtype="U1")
arr1.fill("-")
# how far ahead of the start does this read begin
seq = cigared(r1.seq, r1.cigar)
start = r1.reference_start - reg[1]
arr1[start:start + len(seq)] = list(seq)
aseq = "".join(arr1)
ori = "+"
if r1.is_reverse:
ori = "-"
derep = r1.qname.split("=")[-1]
rname = "{}:{}-{};size={};{}".format(*reg, derep, ori)
clust.append("{}\n{}".format(rname, aseq))
# store this cluster
clusters.append("\n".join(clust))
idx += 1
# if 1000 clusters stored then write to disk
if not idx % 10:
out.write("\n//\n//\n".join(clusters) + "\n//\n//\n")
clusters = []
# write final remaining clusters to disk
out.write("\n//\n//\n".join(clusters) + "\n//\n//\n")
out.close()
build_clusters_from_cigars(data, sample)
#maxlens, depths = get_quick_depths(data, sample)
#sample_cleanup(data, sample)
def get_quick_depths(data, sample):
""" iterate over clustS files to get data """
## use existing sample cluster path if it exists, since this
## func can be used in step 4 and that can occur after merging
## assemblies after step3, and if we then referenced by data.dirs.clusts
## the path would be broken.
if sample.files.clusters:
pass
else:
## set cluster file handles
sample.files.clusters = os.path.join(
data.dirs.clusts, sample.name + ".clustS.gz")
## get new clustered loci
fclust = data.samples[sample.name].files.clusters
clusters = gzip.open(fclust, 'rt')
pairdealer = izip(*[iter(clusters)] * 2)
## storage
depths = []
maxlen = []
## start with cluster 0
tdepth = 0
tlen = 0
## iterate until empty
while 1:
## grab next
try:
name, seq = next(pairdealer)
except StopIteration:
break
## if not the end of a cluster
#print name.strip(), seq.strip()
if name.strip() == seq.strip():
depths.append(tdepth)
maxlen.append(tlen)
tlen = 0
tdepth = 0
else:
try:
tdepth += int(name.strip().split("=")[-1][:-2])
tlen = len(seq)
except:
print(name)
## return
clusters.close()
return np.array(maxlen), np.array(depths)
def sample_cleanup(data, sample):
""" stats, cleanup, and link to samples """
## get maxlen and depths array from clusters
maxlens, depths = get_quick_depths(data, sample)
## Test if depths is non-empty, but just full of zeros.
if not depths.max():
print(" no clusters found for {}".format(sample.name))
return
else:
## store which min was used to calculate hidepth here
sample.stats_dfs.s3["hidepth_min"] = (
data.paramsdict["mindepth_majrule"])
# If our longest sequence is longer than the current max_fragment_len
# then update max_fragment_length. For assurance we require that
# max len is 4 greater than maxlen, to allow for pair separators.
hidepths = depths >= data.paramsdict["mindepth_majrule"]
maxlens = maxlens[hidepths]
## Handle the case where there are no hidepth clusters
if maxlens.any():
maxlen = int(maxlens.mean() + (2. * maxlens.std()))
else:
maxlen = 0
if maxlen > data._hackersonly["max_fragment_length"]:
data._hackersonly["max_fragment_length"] = maxlen + 4
## make sense of stats
keepmj = depths[depths >= data.paramsdict["mindepth_majrule"]]
keepstat = depths[depths >= data.paramsdict["mindepth_statistical"]]
## sample summary stat assignments
sample.stats["state"] = 3
sample.stats["clusters_total"] = depths.shape[0]
sample.stats["clusters_hidepth"] = keepmj.shape[0]
## store depths histogram as a dict. Limit to first 25 bins
bars, bins = np.histogram(depths, bins=range(1, 26))
sample.depths = {int(i): int(v) for i, v in zip(bins, bars) if v}
## sample stat assignments
## Trap numpy warnings ("mean of empty slice") printed by samples
## with few reads.
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=RuntimeWarning)
sample.stats_dfs.s3["merged_pairs"] = sample.stats.reads_merged
sample.stats_dfs.s3["clusters_total"] = depths.shape[0]
try:
sample.stats_dfs.s3["clusters_hidepth"] = (
int(sample.stats["clusters_hidepth"]))
except ValueError:
## Handle clusters_hidepth == NaN
sample.stats_dfs.s3["clusters_hidepth"] = 0
sample.stats_dfs.s3["avg_depth_total"] = depths.mean()
sample.stats_dfs.s3["avg_depth_mj"] = keepmj.mean()
sample.stats_dfs.s3["avg_depth_stat"] = keepstat.mean()
sample.stats_dfs.s3["sd_depth_total"] = depths.std()
sample.stats_dfs.s3["sd_depth_mj"] = keepmj.std()
sample.stats_dfs.s3["sd_depth_stat"] = keepstat.std()
## Get some stats from the bam files
## This is moderately hackish. samtools flagstat returns
## the number of reads in the bam file as the first element
## of the first line, this call makes this assumption.
if not data.paramsdict["assembly_method"] == "denovo":
## shorter names
mapf = os.path.join(
data.dirs.refmapping, sample.name + "-mapped-sorted.bam")
umapf = os.path.join(
data.dirs.refmapping, sample.name + "-unmapped.bam")
## get from unmapped
cmd1 = [ip.bins.samtools, "flagstat", umapf]
proc1 = sps.Popen(cmd1, stderr=sps.STDOUT, stdout=sps.PIPE)
result1 = proc1.communicate()[0]
## get from mapped
cmd2 = [ip.bins.samtools, "flagstat", mapf]
proc2 = sps.Popen(cmd2, stderr=sps.STDOUT, stdout=sps.PIPE)
result2 = proc2.communicate()[0]
## store results
## If PE, samtools reports the _actual_ number of reads mapped, both
## R1 and R2, so here if PE divide the results by 2 to stay consistent
## with how we've been reporting R1 and R2 as one "read pair"
if "pair" in data.paramsdict["datatype"]:
sample.stats["refseq_unmapped_reads"] = int(result1.split()[0]) / 2
sample.stats["refseq_mapped_reads"] = int(result2.split()[0]) / 2
else:
sample.stats["refseq_unmapped_reads"] = int(result1.split()[0])
sample.stats["refseq_mapped_reads"] = int(result2.split()[0])
unmapped = os.path.join(data.dirs.refmapping, sample.name + "-unmapped.bam")
samplesam = os.path.join(data.dirs.refmapping, sample.name + ".sam")
for rfile in [unmapped, samplesam]:
if os.path.exists(rfile):
os.remove(rfile)
# if loglevel==DEBUG
log_level = ip.logger.getEffectiveLevel()
if not log_level == 10:
## Clean up loose files only if not in DEBUG
##- edits/*derep, utemp, *utemp.sort, *htemp, *clust.gz
derepfile = os.path.join(data.dirs.edits, sample.name + "_derep.fastq")
mergefile = os.path.join(data.dirs.edits, sample.name + "_merged_.fastq")
uhandle = os.path.join(data.dirs.clusts, sample.name + ".utemp")
usort = os.path.join(data.dirs.clusts, sample.name + ".utemp.sort")
hhandle = os.path.join(data.dirs.clusts, sample.name + ".htemp")
clusters = os.path.join(data.dirs.clusts, sample.name + ".clust.gz")
for rfile in [derepfile, mergefile, uhandle, usort, hhandle, clusters]:
if os.path.exists(rfile):
os.remove(rfile)
# optimize speed of this next
build_clusters_from_cigars(data, sample)
### Write each out to a sam file...
# newconsensus()
#
data._este = data.stats.error_est.mean()
data._esth = data.stats.hetero_est.mean()
clusters = open(os.path.join(data.dirs.clusts, "{}.clustS.gz".format(sample.name)), 'r')
clusters.read()
# plan to fill an h5 for this sample
tmp5 = consenshandle.replace("_tmpcons.", "_tmpcats.")
with h5py.File(tmp5, 'w') as io5:
io5.create_dataset("cats", (optim, maxlen, 4), dtype=np.uint32)
io5.create_dataset("alls", (optim, ), dtype=np.uint8)
io5.create_dataset("chroms", (optim, 3), dtype=np.int64)
## local copies to use to fill the arrays
catarr = io5["cats"][:]
nallel = io5["alls"][:]
refarr = io5["chroms"][:]
### Step 6 for refmapped pairs:
# 1. convert all sams to bams and make a merged mapped-sorted.bam
# 2. get overlapping regions with bedtools_merge()
# 3. pull consens reads in aligned regions with bamfile.fetch()
# 4. store the consensus sequence in h5.
# 5. store the variants in h5.
# 6. store the depth of variants in h5.
# 7.
# get all regions with reads. Generator to yield (str, int, int)
fullregions = bedtools_merge(data, sample).strip().split("\n")
regions = (i.split("\t") for i in fullregions)
regions = ((i, int(j), int(k)) for (i, j, k) in regions)
# access reads from bam file using pysam
samfile = AlignmentFile(
os.path.join(data.dirs.refmapping,
"{}-mapped-sorted.bam".format(sample.name)),
'rb')
reg = next(regions)
ref = get_ref_region(data.paramsdict["reference_sequence"], *reg)
reads = samfile.fetch(*reg)
# match paired reads together in a dictionary
rdict = {}
for read in reads:
rdict[read.qname] = read
# sort keys by derep number
keys = sorted(
rdict.keys(),
key=lambda x: int(x.split("=")[-1]), reverse=True)
# build the cluster based on map positions, orientation, cigar
clust = []
for key in keys:
r1 = rdict[key]
aref = np.array(list(ref[1]))
arr1 = np.zeros(aref.size, dtype="U1")
arr1.fill("-")
# how far ahead of the start does this read begin
seq = cigared(r1.seq, r1.cigar)
start = r1.reference_start - reg[1]
arr1[start:start + len(seq)] = list(seq)
aseq = "".join(arr1)
ori = "+"
if r1.is_reverse:
ori = "-"
derep = r1.qname.split("=")[-1]
rname = "{}:{}-{};size={};{}".format(*reg, derep, ori)
clust.append("{}\n{}".format(rname, aseq))
clust
['MT:96809-96900;size=18;+\nTGCAGCTGCGGTAGTTAACGAACAGCCTGTCTTGTCTAAAGGGTTAAAAATCAGGTCCGGTGTACAGGCGACGATAGAGAACCCCGGCCTA', 'MT:96809-96900;size=1;+\nTGCAGCTGCGGTAGTTAACGAACAGCCTGTCTTGTCTAAAGGGTTAAAAATCAGGTCCGGTGTACAAGCGACGATAGAGAACCCCGGCCTA', 'MT:96809-96900;size=1;+\nTGCAGCTGCGGTAGTTAACGAACAGCCTGTCTTGTCTAAAGGGTTAAAAATCAGGTCCGGTGTACAGGCGACGACAGAGAACCCCGGCCTA']
rdict
{'37013bcb5c4c2a2541dacf4f2e807af0;size=16': [<pysam.libcalignedsegment.AlignedSegment at 0x7fa80b8110a8>, None]}
mapping_reads(data, sample, 2)
--------------------------------------------------------------------------- IPyradError Traceback (most recent call last) <ipython-input-16-193c6a7f0e62> in <module>() ----> 1 mapping_reads(data, sample, 2) ~/Documents/ipyrad/ipyrad/assemble/clustmap.py in mapping_reads(data, sample, nthreads) 1498 # -O = Output file format, in this case bam 1499 # -o = Output file name -> 1500 1501 # (cmd5) samtools bam2fq -v 45 [in.bam] 1502 # -v45 set the default qscore arbirtrarily high IPyradError: None
s3.data.run("5")
Assembly: 4-refpairtest [####################] 100% 0:00:00 | calculating depths | s5 | [####################] 100% 0:00:00 | chunking clusters | s5 | [####################] 100% 0:00:24 | consens calling | s5 |
subsamples = list(s3.data.samples.values())
subsamples.sort(key=lambda x: x.stats.clusters_hidepth, reverse=True)
jobs = {}
from ipyrad.assemble.jointestimate import *
optim(data, sample)
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-8-9ad4f842e7b0> in <module>() 1 from ipyrad.assemble.jointestimate import * ----> 2 optim(data, sample) ~/Documents/ipyrad/ipyrad/assemble/jointestimate.py in optim(data, sample) 278 try: 279 ## get array of all clusters data --> 280 stacked = stackarray(data, sample) 281 282 ## get base frequencies ~/Documents/ipyrad/ipyrad/assemble/jointestimate.py in stackarray(data, sample) 254 dtype=np.uint64).T 255 --> 256 stacked[nclust, :catg.shape[0], :] = catg 257 nclust += 1 258 IndexError: index 499 is out of bounds for axis 0 with size 499
sample, _, _, nhidepth, maxlen = recal_hidepth(data, sample)
(<ipyrad.core.sample.Sample at 0x7ff440145898>, 499, 495, 499, 495)
data
concat_multiple_edits(data, sample)
merge_pairs_with_vsearch(data, sample, True)
merge_end_to_end(data, sample, True, True)
dereplicate(data, sample, 2)
cluster(data, sample, 2, 1)
build_clusters(data, sample, 5)
muscle_chunker(data, sample)
for idx in range(10):
handle = os.path.join(s3.data.tmpdir,
"{}_chunk_{}.ali".format(sample.name, idx))
align_and_parse(handle, 5, 0)
reconcat(data, sample)
s3.run()
[####################] 100% 0:00:01 | concatenating | s3 | [####################] 100% 0:00:01 | join unmerged pairs | s3 | [####################] 100% 0:00:00 | dereplicating | s3 | [####################] 100% 0:00:00 | splitting dereps | s3 | [####################] 100% 0:00:02 | mapping reads | s3 | [####################] 100% 0:00:37 | building clusters | s3 |
maxlens, depths = get_quick_depths(data, sample)
maxlens, depths
(array([460, 475, 470, 482, 468, 463, 488, 445, 474, 490, 446, 469, 482, 457, 457, 470, 477, 445, 442, 468, 486, 447, 473, 460, 484, 467, 482, 442, 446, 447, 458, 471, 455, 476, 480, 443, 461, 457, 479, 452, 444, 464, 473, 477, 448, 446, 453, 488, 490, 482, 471, 483, 480, 450, 456, 482, 454, 462, 457, 456, 455, 463, 447, 488, 457, 446, 472, 450, 472, 482, 472, 451, 491, 479, 469, 446, 489, 486, 463, 463, 489, 476, 458, 484, 463, 458, 443, 479, 488, 469, 470, 474, 454, 447, 447, 489, 451, 449, 453, 491, 484, 471, 483, 481, 483, 472, 468, 457, 464, 446, 490, 462, 452, 461, 457, 474, 469, 481, 458, 461, 446, 472, 455, 455, 488, 471, 482, 489, 466, 461, 473, 477, 466, 488, 461, 487, 453, 466, 450, 472, 458, 448, 490, 484, 476, 445, 488, 451, 484, 486, 463, 469, 452, 462, 453, 476, 448, 445, 468, 453, 463, 469, 487, 445, 453, 469, 447, 444, 464, 488, 468, 448, 461, 465, 483, 446, 447, 473, 448, 448, 444, 481, 464, 474, 464, 459, 461, 458, 451, 449, 468, 445, 491, 443, 448, 475, 450, 454, 454, 467, 478, 452, 451, 468, 480, 468, 464, 484, 462, 484, 475, 457, 463, 453, 473, 457, 486, 469, 485, 462, 464, 442, 475, 473, 470, 482, 444, 456, 479, 472, 477, 446, 443, 471, 473, 489, 467, 463, 476, 481, 448, 450, 460, 452, 458, 479, 444, 457, 489, 488, 444, 453, 472, 461, 481, 451, 450, 491, 473, 480, 491, 489, 468, 458, 472, 460, 450, 462, 447, 457, 465, 452, 468, 475, 465, 491, 447, 453, 448, 471, 490, 472, 446, 450, 478, 468, 455, 470, 454, 464, 479, 472, 456, 475, 479, 446, 474, 470, 474, 446, 491, 479, 470, 457, 457, 482, 453, 468, 457, 454, 453, 445, 489, 477, 451, 478, 485, 456, 485, 449, 485, 478, 468, 462, 454, 486, 482, 471, 488, 444, 485, 450, 485, 449, 444, 453, 482, 484, 472, 456, 474, 461, 487, 468, 482, 480, 459, 443, 489, 447, 444, 453, 447, 470, 486, 442, 442, 487, 445, 471, 480, 475, 450, 456, 473, 468, 455, 452, 491, 470, 479, 469, 469, 442, 489, 451, 476, 464, 477, 487, 484, 468, 454, 462, 460, 476, 442, 476, 489, 481, 477, 485, 456, 453, 476, 471, 473, 453, 480, 490, 479, 488, 470, 483, 483, 450, 451, 459, 469, 456, 461, 458, 451, 450, 446, 482, 446, 448, 472, 489, 485, 489, 449, 477, 488, 452, 460, 450, 471, 444, 452, 456, 452, 467, 455, 489, 481, 474, 463, 456, 475, 453, 445, 487, 481, 445, 476, 476, 483, 456, 455, 444, 443, 452, 478, 488, 452, 471, 452, 483, 484, 490, 457, 487, 464, 455, 480, 491, 475, 449, 489, 447, 468, 468, 485, 475, 484, 455, 444, 450, 466, 448, 442, 459, 488, 482, 455, 474, 480, 482, 474, 484, 488, 469, 475, 481, 482, 446, 473]), array([17, 14, 17, 20, 25, 21, 18, 25, 21, 24, 22, 23, 21, 22, 23, 20, 17, 22, 19, 20, 25, 26, 17, 21, 19, 25, 19, 21, 20, 16, 21, 18, 22, 22, 20, 21, 22, 23, 20, 19, 19, 17, 16, 27, 21, 20, 17, 19, 20, 18, 15, 20, 24, 20, 19, 19, 20, 23, 20, 23, 20, 19, 20, 23, 22, 25, 17, 27, 22, 26, 22, 19, 21, 18, 21, 20, 19, 20, 25, 19, 19, 16, 23, 19, 17, 18, 18, 21, 16, 19, 26, 22, 22, 23, 18, 22, 21, 24, 18, 23, 19, 18, 20, 18, 21, 22, 21, 22, 19, 19, 18, 20, 17, 19, 22, 25, 20, 21, 19, 21, 19, 20, 24, 19, 22, 22, 20, 19, 21, 20, 17, 21, 19, 19, 22, 19, 19, 16, 19, 22, 18, 19, 15, 18, 18, 20, 20, 25, 17, 21, 19, 18, 18, 22, 21, 21, 19, 21, 14, 25, 19, 17, 20, 21, 14, 20, 21, 20, 18, 25, 27, 16, 19, 18, 21, 22, 15, 18, 19, 21, 21, 19, 17, 20, 19, 20, 22, 24, 18, 18, 18, 25, 19, 19, 19, 17, 26, 18, 20, 22, 20, 22, 22, 21, 21, 21, 21, 18, 18, 19, 19, 19, 25, 17, 22, 21, 17, 18, 18, 18, 20, 22, 17, 19, 19, 20, 25, 19, 25, 18, 17, 25, 15, 19, 19, 23, 19, 22, 23, 17, 15, 22, 16, 20, 24, 23, 19, 18, 24, 19, 20, 20, 19, 19, 18, 26, 18, 18, 23, 20, 18, 21, 22, 25, 23, 16, 19, 25, 24, 22, 21, 20, 20, 18, 25, 17, 18, 14, 17, 17, 18, 19, 17, 19, 12, 15, 24, 16, 19, 15, 18, 20, 21, 17, 19, 21, 26, 19, 22, 19, 19, 21, 29, 19, 22, 21, 22, 19, 23, 17, 25, 20, 21, 20, 23, 18, 21, 19, 27, 14, 18, 16, 19, 21, 17, 25, 17, 19, 21, 20, 23, 22, 18, 24, 20, 20, 21, 25, 15, 18, 17, 21, 27, 20, 24, 23, 21, 20, 26, 20, 23, 20, 21, 23, 21, 19, 18, 21, 19, 21, 15, 22, 22, 23, 21, 19, 20, 14, 21, 25, 18, 20, 13, 17, 26, 23, 19, 18, 14, 22, 25, 21, 17, 23, 23, 17, 20, 19, 22, 20, 21, 18, 17, 17, 19, 26, 19, 18, 21, 20, 19, 22, 15, 20, 14, 19, 22, 23, 20, 17, 23, 16, 17, 18, 18, 20, 16, 16, 14, 22, 19, 19, 22, 16, 22, 24, 18, 21, 22, 18, 22, 20, 25, 17, 16, 25, 27, 20, 22, 22, 23, 21, 21, 23, 15, 17, 22, 21, 23, 17, 16, 26, 20, 17, 22, 24, 21, 21, 24, 21, 20, 21, 21, 21, 19, 24, 18, 22, 23, 21, 15, 18, 21, 27, 14, 17, 21, 23, 26, 23, 25, 22, 20, 20, 18, 19, 20, 21, 25, 13, 21, 17, 19, 18, 21, 19, 17, 21, 19]))
data = s3.data
samples = list(s3.data.samples.values())
sample = samples[1]
sample = samples[1]
concat_multiple_edits(data, sample)
merge_end_to_end(data, sample, False)
dereplicate(data, sample, 2)
split_endtoend_reads(data, sample)
mapping_reads(data, sample, 2)
build_ref_cigars(data, sample)
#samtools mpileup -uf ref.fa aln1.bam aln2.bam | bcftools view -bvcg - > var.raw.bcf
! samtools mpileup -ur /home/deren/Dropbox/opbox/Maud/lgeorge.genome.fa
#regions = bedtools_merge(data, sample).strip().split("\n")
fullregions = bedtools_merge(data, sample).strip().split("\n")
regions = (i.split("\t") for i in fullregions)
regions = ((i, int(j), int(k)) for (i, j, k) in regions)
def get_ref_region(reference, contig, rstart, rend):
"returns the reference sequence over a given region"
cmd = [
ip.bins.samtools, 'faidx',
reference,
"{}:{}-{}".format(contig, rstart + 1, rend),
]
stdout, err = sps.Popen(cmd, stdout=sps.PIPE).communicate()
name, seq = stdout.decode().split("\n", 1)
listseq = [name, seq.replace("\n", "")]
return listseq
def build_ref_cigars(data, sample):
# get all regions with reads. Generator to yield (str, int, int)
#fullregions = bedtools_merge(data, sample).strip().split("\n")
regions = (i.split("\t") for i in fullregions)
regions = ((i, int(j), int(k)) for (i, j, k) in regions)
# access reads from bam file using pysam
samfile = AlignmentFile(
os.path.join(data.dirs.refmapping,
"{}-mapped-sorted.bam".format(sample.name)),
'rb')
# iterate over all regions
out = open("test.clustS", 'w')
idx = 0
clusters = []
for reg in regions:
ref = get_ref_region(data.paramsdict["reference_sequence"], *reg)
reads = samfile.fetch(*reg)
# match paired reads together in a dictionary
rdict = {}
for read in reads:
if read.qname not in rdict:
rdict[read.qname] = [read, None]
else:
rdict[read.qname][1] = read
# sort keys by derep number
keys = sorted(
rdict.keys(),
key=lambda x: int(x.split("=")[-1]), reverse=True)
# build the cluster based on map positions, orientation, cigar
clust = []
for key in keys:
r1, r2 = rdict[key]
if r1 and r2:
aref = np.array(list(ref[1]))
arr1 = np.zeros(aref.size, dtype="U1")
arr2 = np.zeros(aref.size, dtype="U1")
arr1.fill("-")
arr2.fill("-")
try:
# how far ahead of the start does this read begin
seq = cigared(r1.seq, r1.cigar)
start = r1.reference_start - reg[1]
arr1[start:start + len(seq)] = list(seq)
seq = cigared(r2.seq, r2.cigar)
start = r2.reference_start - reg[1]
arr2[start:start + len(seq)] = list(seq)
arr3 = join_arrays(arr1, arr2)
pairseq = "".join(arr3)
derep = r1.qname.split("=")[-1]
clust.append("{}\n{}".format("{}:{}-{};size={}"
.format(*reg, derep), pairseq))
except ValueError:
print(reg)
clusters.append("\n".join(clust))
idx += 1
if not idx % 1000:
out.write("\n//\n//\n".join(clusters))
out.close()
def build_ref_cigars(data, sample):
# get all regions with reads. Generator to yield (str, int, int)
#fullregions = bedtools_merge(data, sample).strip().split("\n")
regions = (i.split("\t") for i in fullregions)
regions = ((i, int(j), int(k)) for (i, j, k) in regions)
# access reads from bam file using pysam
samfile = AlignmentFile(
os.path.join(data.dirs.refmapping,
"{}-mapped-sorted.bam".format(sample.name)),
'rb')
# iterate over all regions
opath = os.path.join(
data.dirs.refmapping, "{}.clustS.gz".format(sample.name))
out = gzip.open(opath, 'w')
idx = 0
clusters = []
for reg in regions:
ref = get_ref_region(data.paramsdict["reference_sequence"], *reg)
reads = samfile.fetch(*reg)
# match paired reads together in a dictionary
rdict = {}
for read in reads:
if read.qname not in rdict:
rdict[read.qname] = [read, None]
else:
rdict[read.qname][1] = read
# sort keys by derep number
keys = sorted(
rdict.keys(),
key=lambda x: int(x.split("=")[-1]), reverse=True)
# build the cluster based on map positions, orientation, cigar
clust = []
for key in keys:
r1, r2 = rdict[key]
if r1 and r2:
aref = np.array(list(ref[1]))
arr1 = np.zeros(aref.size, dtype="U1")
arr2 = np.zeros(aref.size, dtype="U1")
arr1.fill("-")
arr2.fill("-")
# how far ahead of the start does this read begin
seq = cigared(r1.seq, r1.cigar)
start = r1.reference_start - reg[1]
arr1[start:start + len(seq)] = list(seq)
seq = cigared(r2.seq, r2.cigar)
start = r2.reference_start - reg[1]
arr2[start:start + len(seq)] = list(seq)
arr3 = join_arrays(arr1, arr2)
pairseq = "".join(arr3)
derep = r1.qname.split("=")[-1]
rname = "{}:{}-{};size={}".format(*reg, derep)
clust.append("{}\n{}".format(rname, pairseq))
clusters.append("\n".join(clust))
idx += 1
if not idx % 100:
out.write("\n//\n//\n".join(clusters).encode())
out.close()
c = build_ref_cigars(data, sample)
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-550-1567c51d5c9b> in <module>() ----> 1 c = build_ref_cigars(data, sample) <ipython-input-549-0d680f391a25> in build_ref_cigars(data, sample) 20 clusters = [] 21 for reg in regions: ---> 22 ref = get_ref_region(data.paramsdict["reference_sequence"], *reg) 23 reads = samfile.fetch(*reg) 24 ~/Documents/ipyrad/ipyrad/assemble/clustmap.py in get_ref_region(reference, contig, rstart, rend) 2150 """ 2151 inp = os.path.join(data.tmpdir, "{}_derep.fastq".format(sample.name)) -> 2152 out1 = os.path.join(data.tmpdir, "{}_R1-tmp.fastq".format(sample.name)) 2153 out2 = os.path.join(data.tmpdir, "{}_R2-tmp.fastq".format(sample.name)) 2154 ~/miniconda3/lib/python3.6/subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors) 707 c2pread, c2pwrite, 708 errread, errwrite, --> 709 restore_signals, start_new_session) 710 except: 711 # Cleanup if the child failed starting. ~/miniconda3/lib/python3.6/subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session) 1296 errpipe_data = bytearray() 1297 while True: -> 1298 part = os.read(errpipe_read, 50000) 1299 errpipe_data += part 1300 if not part or len(errpipe_data) > 50000: KeyboardInterrupt:
"\n//\n//\n".join(c).encode()
b'Contig0:3481-3748;size=3\nCATGAGGTCCTTAGGGAGGGAATCAACGAAGTAGCCAAACACTTGCTGAATGTTGAAATGAGCCCCTACAG------------------------------------------------------------------------------------------------------------------------TTCCAGGGTCACACCAGCCAAGGCACTGTTGCAGTCACTTAACAAAGCAGTGTGATTCCCAGTTGGCCACAGAATT\n//\n//\nContig0:4544-5088;size=1\nCATGCTTGTTTAAAGTTGTGCAGTGCTCTTTTCTGGCAGGGGATTGGTCCTGCTTTGAGCAGGGGGTTGGA--------------------------------------------------------------------------------------------------------------------------------------TGGACTGGGAGCCCCTCTGTCAGCTCCCTGCTCCCCTAAGTTCCCTGTGCTGCAGTCGCCCAGCAGGCTATCAATT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:4544-5088;size=1\n-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGTGCTGCTCCTGCCCTCTGCCTTGGAGCTGCTCCCAGAGACTCCTGCTTGCTGTGCAGGGAGGAAAGG------------------------------------------------------------------------------------GAGGGAGAGAGACAGAGAGAGCTTGGGGCAGCAGCTGCTGTCTCAACTTCCTGATCCACTGACAAACAATGCAATT\nContig0:4544-5088;size=1\n-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGTGCTGCTCCTGCCCTCTGCCTTGGAGCTGCTCCCAGAGACTCCTGCTTGCTGCGCAGGGAGGAAAGG------------------------------------------------------------------------------------GAGGGAGAGAGACAGAGAGAGCTTGGGGCAGCAGCTGCTGTCTCAACTTCCTGATCCACTGACAAACAATGCAATT\nContig0:4544-5088;size=1\n------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGGTGTCCCCCTCCTCCCTGCTCCTGCACCCTGCTTACCTCTTCTCCATATAGAGCAGGGAGGGGACAC-GAGGGAGAGAGACAGAGAGAGCTTGGGGCAGCAGCTGCTGTCTCAACTTCCTGATCCACTGACAAACAATGCAATT\n//\n//\nContig0:7193-7952;size=1\nAATTGATGGGACCCTGGAACATTTCTAGCTTGTACTGATTGTTTTGGGATTTTTTTGTTTGTTCTCTGGTTTCAATAGCAGTTGGGTGCCCAGATACCTGGGGGGAGCAGTTGGAGGGGGGTTCGCCTTCCTCTGCAGCATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:7193-7952;size=1\n------------------------------------------------------------------------------------------------------------------------------------------CATGGAGCACAGGACATTTGCAGGTTTAAACTAGTGTAAATGGTGAATCCTCTGTGACTTAAAGTCTTTAA----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAGCAGCAAATAGATGGGGTGAGGGCAGGTGAATGGAGCGACTCCAATCAGATCATGGGTTTTAGCAGACTAATT--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:7193-7952;size=1\n------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTAACATTGCTGTGCGATTCCTGGACCACTGCAGAATTCAGTGAAATCTAGGGGTCTTGGCTGCCTAAATACAT------------------------------------------------------------CCTAGCACCAGGGGACCTCCAGGTGCTACTGTAATATACAAGGGGATGATGATGATAGGATGCAGCACATG\n//\n//\nContig0:13327-13845;size=4\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTTGCTTTCTCATTCACACACTTTTCATAGGGTGCTACAGTCATGCATCCTGATCCATACTTCTGTTACCC---------------------------------------------------------------------------------------------CTGCCCCCTCCCGTGGACGGACTCCGGGTGGGTGTATCGCAGGCTGGGAGCATACTGAAGGTGGACACATG\nContig0:13327-13845;size=1\nCCAGCCAGGGCTTTGTCAAGCCTGACCTTAAAAACCTCTAAGGAAGGAGATTCCACCACCTCCCTAG----------------------------------------------------------------------------------------------------------------------TGAGAACAGTCTAGATCCAACCTCTTTGGAACCCCCTTTCAGGTAGTTCAAAGTAGCTATCAAAT----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n//\n//\nContig0:14393-14579;size=1\nCATGCTGGGCCAGATCCACCCCCCTGGGGAAGCCCAGGGGATTGCACTAGGGATCAGGTTTGCCCCTT---------------------------------------------TTTAAATGAGTCCTTATTTGACTCTGGGCTCACCAGGCCACCTCCAAGCTGTTCCTTGGCTGGCTCCGTAATT\n//\n//\nContig0:14907-15026;size=1\nCATGATCTTGGACACATCTGTTCACCTCTCTTCTTCCCCTCCACGCCCCACCACCCAAAAAAATCTGTGAAATGGTAGGGTTGATGCTGCTTTACCTTCTGGGGGTGCTTGTGAGAATT\n//\n//\nContig0:15377-15552;size=1\nAATTGCCCTGCTTGCTACCCCTTAACACCAGCCCTGGGTTTTACATGCAGAAAACAGGTGTTGTGGCACAGG--------------------------------TGATCTCGGCAGGCTGCTGTTGGTCCATGTGCCATGGTCCAGCAGCACTCCTGGACTTGCAAACATTCATG\n//\n//\nContig0:16736-16933;size=1\nAAATTCTCAGGGCTTTGCTATGCAGGAGATCAGACTAGATTA------------------------------------------------------------------------------------GCAGGAAAATCTGTTACCTTTGTGTTTTCCCTGGGAGAGGTGGGGGCGCCGCTGGCTCCGACTCCGGCATG\n//\n//\nContig0:19843-20061;size=1\nCATGTGACTTTCCCTGACGGGGAAAACCACTGTGACCTGACCAGAGGGCCAAGCCATGAAACGGGAGCAGC-----------------------------------------------------------------------CAGCCAGAAGGAGGTGCTCCAGCTCTGAGTAGAGCTCCTCCAAGGATGAATAGAGATCTGGACTGGAAGCTAAATT\nContig0:19843-20061;size=1\nCATGTGACTTTCCCTGACGGGGAAAACCACTGTGACCTGACCAGAGGGCCAAGCCATGAAACGGG-----------------------------------------------------------------------------CAGCCAGAAGGAGGTGCTCCAGCTCTGAGTAGAGCTCCTCCATGGATGACTAGAGATCTGGACTGGAAGCTAAATT\nContig0:19843-20061;size=1\nCATGTGACTTTCCCTGACGGGGAAAACCACTGTGACCTGACCAGAGGGCCAAGCCATGAAACGGGAGCAGC-----------------------------------------------------------------------CAGCCAGAAGGAGGTGCTCCAGCTCTGAGTAGAGCTCCTCCATGGATGACTAGAGATCTGGACTGGAAGCTAAATT\n//\n//\nContig0:22414-22496;size=1\nAATTCTCGATCATCACGTGCACTGGGTGTGGCCGGATTTCCGAGATGGATGGAACTGGGTGAGCCAAATGAATCCCTCCATG\n//\n//\nContig0:22928-23117;size=1\nCATGGGCCAGGATTCCAAAGGTTAACACCCCGGCAGCAGAGGTGGGGAGCTGAGAGCACTCAGCACCTCGA--------------------------------------------GGACTGCACTCTATATCCACCCTTGAATAGGAACCTACATGAAATGGAGTCGCTCTCCAGATGTTTGGGGAATT\n//\n//\nContig0:23425-23627;size=1\nCATGCATAAGGTCTCTAAATCCCACCACTGTTCTGATCTTTGTTTTGTAAAGTGCCCATGAGCGTAGCATC-------------------------------------------------------------------------------------GGAGAGGGTACGCTGTATTCTTCAGACATAACGGCTCGATTAATT-\nContig0:23425-23627;size=1\nCATGCATAAGTTCTCTAAATCCCACCACTGTTCTGATCTTTGTTTTGTAAAGTGCCCATGAGCGTAGCATC------------------------------------------------------------------------------------------GGGTACGCTGTATTCTTCAGACATAACGGCTCGATTAATTA\nContig0:23425-23627;size=1\n--------------------------------------------------------CATGAGCGTAGCATCAGGCCCAAATGACAGGGCTGATATGCCACACACTGGTAAACTCTGTCCCATTTCTCATTCACTCTGCATTCAGTGGTGTAATGTGGGAGAGGGTACGCTGTATTCTTCAGACATAACGGCTCGATTAATT-\n//\n//\nContig0:24391-24908;size=3\n--------------------------AATTATTATGTACATAGTTTCGTCCTATTCAGTGTCTACTCAGCGCTTCTCGGCTTGTCTCTTGTATTCATTAAAT-------------------------------------------------------------------------------------------------------------------CGCTCACTGCTCAGCAGTTCGATGGAAAAAAATCTAGATGAGAGTGCAAAAAATGTATCTTTAGTGACATG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:24391-24908;size=1\nAATTTATTATGAAACAAAAGGCAAAAAACTATTATGTACATAGTTTAGTCCTATTCAGTGTCTACTCAGCGCT----------------------------------------------------------------------TGATGGGCTCCATTTGCCCTGATAGCGTTTCTCCATTGTTGCAATGTCCTGGTGAAATCGCTCGCC--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:24391-24908;size=1\nAATTTATTATGAAACAAAAGGCAAAAAACTATTATGTACATAGTTTAGTCCTATTCAGTGTCTACTCAGCGCT------------------------------------------------------------------------------------------------------------------------------------------------CGCTCACTACTCCACAGTTCAGTGGAAAAAAATCTAGATGAGAGTGCAAAAAATCTATCTTTAGTGACATG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:24391-24908;size=1\nAATTTATTATGAAACAAAAGGCAAAAAACTATTATGTACGTAGTTTAGTCCTATTCAATGTCTACTCAGCGCT----------------------------------------------------------------------TGATGGGCTCCATTTGCCCTGATAGCGTTTCTCCAATGTTGCAATGTCCTGGTGAAATCGCTCGCCATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:24391-24908;size=1\nAATTTATTATGAAACAAAAGGCAAAAAACTATTATGTACATAGTTTAGTCCTATTCAGTGTCTACTCAGCGCT--------------------------------------------------------------------------GGGCTCCATTTGCCCTGATAGTGTTTCTCCATTGTTGCAATGTCCTGGTGAAATCGCTCACCGTGCTC--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:24391-24908;size=1\nAATTTATTATGAAACAAAAGGCAAAAAACTATTATGTACATAGTTTAGTCCTATTCAGTGTCTACTCAGCGCT---------------------------------------------------------------------TTGATGGGCTCCATTTGCCCTGATAGCGTTTCTCCATTCCTGCAATGTCCTGGTGAAATCGCACGCCATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:24391-24908;size=1\n-----------------------------------------AATTTAGTCCTATTCGGGGTCTACTCAGCGCTTCTTGGCTTGTCTCTTGTATTCATTAAATGGAGCATCTCTTGTC----------------------------------------------------------------------------------------------------CGCTCACCGCTCCGCAGTTCGGTGGAAAAAAATCTAGATGAGAGTGCAAAAAATGTATCTTTAGTGACATG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:24391-24908;size=1\n------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGCCGTCTTTTCCTTGCCATGCAGTGCATGGTCAAATGCATCGTCTCGAAGAAGTTCACCAATCTGAGGACCAACAAAGACACCTTCCTTTGTCTTAGCTTCACTTAACCTTGGAAATT\n//\n//\nContig0:25290-25837;size=1\nAATTGATGGCAAAACATTGCCATTATGCAGCAAAACAACTTTAAGACTCGTCTTCGATGAATCAATGAACAGTCTC----------------------------------------------------TGTTGCAGGCTACTAGATCACCTTCCATGAAGAAGAATGGGACAAGATCCTTTTGATGGTCACGGAACATG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:25290-25837;size=1\nAATTGATGGCAAAACATTGCCATTATGCAGCAAAACAGCTTTAAGACTTGTCTTCGATGAATCAATGAACAGTCTC----------------------------------------------------TATTGCAGGCTCCAAGATCACCTTCCATGAAGAAGAATGGGACAAGATCCTTTTGATGGTCACGGAACATG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:25290-25837;size=1\n----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTAATAAAGTTTGACTACATTTATTTCAGAAGCATTTTGGCTGTAGAGCAGCGAAATCAAAGCTGTGCCGATGC------------------------------------------------------------------------------------------AAGCAAATAGAGACATTTTCAGGTCTGCTGGCTGGTTAGGCTCCATAGTCCTACGCAAGGATGTGATCATG\n//\n//\nContig0:28260-28458;size=1\nCATGCGTCTGCAGAAAAGTACGGAGACTGGGCAAAGGCTGTGAGAAAGAAGAACGTTTGGGGCTTGTCCAA---------------------------------------------------ATCCAGTTCCTCTGCCTACTCTGGGACATTTATATCACAGAGTAGGCACTGTAATAAAAAGCAAGCAATGAGAATT\n//\n//\nContig0:28945-29088;size=1\nAATTCAAGAACTCAAAGGGACACTATCACCTTAAACTAGCAGGCACACACAAGAGGCAGTAGCTAGGTTGATGGGAGGATGCTCAGAGGTGTGGATTCTTTCATCGATCTAGCAGTGCCCACACTAGGGCTTAGGGTGGCATG\n//\n//\nContig0:30099-30713;size=7\n--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTCCAACTAGTCCAGATGGCTGCATTTCCCCATACACCCACTACTCCTGTTTGGAAGCCTCTCCATCACTTCCT---------------------------------------------------------------------------------------------------------------------------TGGTAGGGGCTGAAGGCTCTGGTCTGGGGTGGGCTTGATCTCTTGTACAGAAAACAGAGCTGAACCACATG\nContig0:30099-30713;size=1\nAATTAATGCTAGTCTTAATGAGAGTCTATTTCAACTCTTATGTAGCTGACAGGCTTTCTCAGGGCATGGCTGCTGT-------------------------------------------------------------------------------------------------TAGTTCCTTGAAGAATCAGGTTTTCCCTATTTTTTTTTAATCTCCTGTGTCAACTGGCGTCTTGGACATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:30099-30713;size=1\nAATTAATGATAGTCTTAATGAGAGTCTATTTCAACTCTTATGTAGCTGACAGGCTTTCTCAGGGCATGGCTGCTGT------------------------------------------------------------------------------------------------TTAGTTCCTTGAAGAATCAGGTTTTCCCTATTTTTTTTTAATCTCCTGTGTCAACTGGCGTCTTGGACATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:30099-30713;size=1\n--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTCCAACTAGTCCAGATGGCTGCAATTCCCCATACACCCACTACTCCTGTTTGGAAGCCTCTCCATCACTTCCT---------------------------------------------------------------------------------------------------------------------------TGGTAGGGGCTGAAGGCTCTGGTCTGGGGTGGGCTTGATCTCTTGTACAGAAAACAGAGCTGAACCACATG\nContig0:30099-30713;size=1\n--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTCCAACTAGTCCAGATGGCTGCATTTCCCCATACACCCACTACTCCTGTTTGGAAGCCTCTCCATCACTTCCT---------------------------------------------------------------------------------------------------------------------------------------------------TGGGGTGGGCTTGACCTCTTGTACAGAAAACAGAGCTGA--------\n//\n//\nContig0:32872-33505;size=1\nAATTCCTCTCCCTCCCTAACGTTAATCCCCCTGATATATTTATATAGAGCAAGCA---------------------------------------------------------------------------------CTCGGATCATCCTAGTAGCCCGTCTCTGAACCTGTTCCAGTTTGAATTC----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:32872-33505;size=1\n-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGGATCCACTCAAGCAGTGCAAAGCAGCCGGAAATCCAGCCAATCAAGTTTGTATAGGCAAAATCCCTG-----------------------------------------------------------------------------------------------------------------------------------GGAAGCTGTGAAAGCAGCAGACAGCCACCTTGCCCCCAGCCTTAAAATCTGGACTGAGCTGAGTTTGGATGCAATT\n//\n//\nContig0:33785-34014;size=3\nAATTAGTGCTAGGAGTCATTTTATTTAGCACCGTCATTAATGATTTAGGAAGGTGGAGCGTTAGGAGCCCATTAAT----------------------------------------------------------------------------------ATAGGCCTTCCCAAGCTTTGGCAACCACCTATTCCCATAGGTGCTACAGGGGGCAGAGCTCCGTGTGCATG\nContig0:33785-34014;size=1\nAATTAGTGCTAGGAGTCATTTTCTTTAGCAGCGTCATTAATGATTTAGGAAGGTG------------------------------------------------------------------------------------------------------------CCTTCCCAAGCTTTGGCAACCACCTATTCCCATAGGTGCTACAGGGGGCAGAGCTCCGTGTGCATG\n//\n//\nContig0:36273-36404;size=1\nAATTGCCCCACTTGCCTCCTTGCCGGGAGGCCCTGAAACACCCCCAGGGAAAAATAAAACTCAGCGCCTGTGATATGCAGTCTTAGATATAGAGGCAGGCTGAGGAGGGGCTGGGTGTGAGGGGGAGCATG\n//\n//\nContig0:37976-38353;size=5\nAATTAAATGCTACAGTGTTGAGCTGTTTTTTGAGGAAGAGCCTGTTCCTTGAAAGCAACTGGCCTTTGCTTTTCTT----------------------------------------------------------------------------------------------------------------------------GAGGCTGTGGGATCGGCAGCTGGAGTTCCCAGGGCTGCTTGCTGAACTTGTTTATTGTCGATTTTCCCATG----------------------------------------------------------------------------------------------------------\nContig0:37976-38353;size=1\n---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGTTAAGTATCCTCACACCTCTTGTCTACTGTCTGAAATGGGCCGTCTTGNTTATCACTACAAAAGTTTTTTTTTTCTCCTGCTGATAATAGCTCATCTTAATTAATT\n//\n//\nContig0:39644-39906;size=1\nAATTTGGACATCACGATTCTCAGGCGGCAGGTTCATGCCATGGAACACCGATTCTGGGCCTTGGAAAC---------------------------------------------------------------------------------------------------------------------------GTGCAAGAATACCAAGATGAGAGCAGCCCTCACAGTTGAGAAGCAAGTGACGATAGCCCTGTGGAGGCATG\n//\n//\nContig0:40396-40633;size=11\nCATGCACAGGCAGCTTGGACAGTAGTCAGGAGCTGTTCAACTATAGACTGAGCAAGTGCAGAATGGTGGTA------------------------------------------------------------------------------------------GCTTGTTGTGTGCTCCACAATATCTGTGAGACTAAGGGGGAGATGTTTATGGCGGAGTGGGAGATTGAGGCAAATT\nContig0:40396-40633;size=1\nCATGCACAGGCAGCTTGGACAGTAGTCAGGAGCTGTTCAACTATAGGCTGAGCAAGTGCAGAATGGTGGTA------------------------------------------------------------------------------------------GCTTGTTGTGTGCTCCACAATATCTGTGAGACTAAGGGGGAGATGTTTATGGCGGAGTGGGAGATTGAGGCAAATT\nContig0:40396-40633;size=1\nCATGCACAGGCAGCTTGGACAGTAGTCAGGAGCTGTTCAACTATAGACTGAGCAAGTGCAGAATGGTG------------------------------------------------------------------------------------------------------------------------GAGACTAAGGGGGAGATGTTTATGGCGGAGTGGGAGATTGAGGCAAATT\nContig0:40396-40633;size=1\nCATGCACAGGCAGCTTGGACAGTAGTCAGGAGCTGTTCAACTATAGACTGAGCAAGTGCAGAATG------------------------------------------------------------------------------------------------GCTTGTTGTGTGCTCCACAATATCTGTGAGACTAAGGGGGAGATGTTTATGGCGGAGTGGGAGATTGAGGCAAATT\n//\n//\nContig0:41620-42163;size=14\n------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGTGGCTAACAATAGGGATGATTTCTGTTCAGCCAAAGGTAAACAGCCCAGCAGGAACGGCCATCTCTG------------------------------------------------------------------------------------------------CATCCCCATACACATTAATAGACTTTTCCAGTAGCTGTACTGTCTGCCAATGCATCCCAAGTCTTCAGGGCAAATT\nContig0:41620-42163;size=1\nCATGGCATGTGTGCTTTCTTTACAAGATCGCATTTTGCCTCTTATATTGAGGGCCTGCTGGTTTGGCGTGAGAGATCACACACGCAGGGCTGGTGGGCAACAGAATT----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:41620-42163;size=1\n------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGTGGCTAACAATAGGGATGATTTCTGTTCAGCCAAAGGTAAACAGCCCAGCAGGAACGGCCATCTCTG--------------------------------------------------------------------------------------------------TCCCCATACACATTAATAGACTTTTCCAGTAGCTGTACTGTCTGCCAATGCATCCCAAGTCTTCAGGGCAAATT\nContig0:41620-42163;size=1\n------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGTGGCTAACAATAGGGATGATTTCTGTTCAGCCAAAGGTAAACAGCCCAGCAGGAACGGCCATCTC----------------------------------------------------------------------------------------------------TCCCCATACACATTAATAGACTTTTCCAGTAGCTGTACTGTCTGCCAATGCATCCCAAGTCTTCAGGGCAAATT\nContig0:41620-42163;size=1\n------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGTGGCTAACAATAGGGATGATTTCTGTTCAGCCAAAGGTAAACAGCCCAGCAGGAACGGCCATCTCTG------------------------------------------------------------------------------------------------CATCCCCATACACATTAATGGACTTTTCCAGTAGCTGTACTGTCTGCCAATGCATCCCAAGTCTTCAGGGCAAATT\n//\n//\nContig0:42764-42845;size=1\nCATGGTCACCTGTGCTGATGAGCTCTGCATGGTCACCTGTGCTGATCAGCTTGCCACGCTGGTCAAACAGGAAATCAAATT\n//\n//\nContig0:44568-44784;size=1\nAATTTCCACAGGGCGGGAAAACCATTTTGTGCCCAGCACCCATTCAGGCAGCTGGTCACTGGGGTTGTCTGTGATG---------------------------------------------------------------------CCCTTTGATGACCATCCCCGCTGCTTAAGACCCAAAAGCAGAGCCTCGACCGCTGATGTTTCATGCACATG\n//\n//\nContig0:48224-48506;size=2\n-------------------------------------------------------------------CATGTCCAAATAATGTTGGACAACATAAGAAGGGCATTTTATATAAACAAGCAGGGGGGAGTGAGATCTCT--------------------------------------------------------------------ATATTTACCAGGGTTGCAAAACTCCCTAGCAGAGAGCCTGAGTGGATGTTTTTCCACAAATAGGAGATTCACAATT\nContig0:48224-48506;size=1\nTAATTTACTAGAACTCCAGGTGGTTCACTGAGCCTGCATCTCCTTCCCCCTGCTCATCCAATCCAGACATG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n//\n//\nContig0:49781-50026;size=1\nAATTTGCTTTTGCGGGCCAGTCCCATCTGAGGGCTACTCAGTTCCTCTTATGTGGAAGGGGAAGTAGTGGTCTTGA--------------------------------------------------------------------------------------------------GGACACCGTACTGATGTGGGAGAGAAAAGAGAGCAAGCTTAGATTGCATGCACTTTGCAGCAGCACCCATG\nContig0:49781-50026;size=1\nAATTTGCTTTTGCGGGCCAGTCCCATCTGAGGGCTACTCAGTT--------------------------------------------------------------------------------------------------------------CGGCTTCGTGTTGAAGCACTCGGACACCGTACTGATGTGGGAGAGAAAAGAGAGCAAGCTTAGATTGCATG---------------------\nContig0:49781-50026;size=1\nAATTTGCTTTTGCGGGCCAGTCCCATCTAAGGGCTACTCAGTTCCTCTTATGTGGAAGGGGAAGTA------------------------------------------------------------------------------------------------------------------CGTACTGATGTGGGAGAGAAAAGAGAGCAAGCTTAGATTGCATGCACTTTGCAGCAGCACCCATG\nContig0:49781-50026;size=1\nAATTTGCTTTTGCGGGCCAGTCCCATCTGAGGGCTACTCAGTTCCTCTTATGTGGAAGGGGAAGTAGTGGTCTTGA-----------------------------------------------------------------------------CGGCTTCGTGTTGAAGCACTCGGACACCGTACTGATGTGGGAGAGAAAAGAGAGCAAGCTTAGATTGCATG---------------------\n//\n//\nContig0:51214-51341;size=1\nCATGCCAGGTCCCACTGGCGGAGGCAGGGTCACGCCCAGGCCATCCCTGGAAGTACCAGAATGTCCAGTATTCTGGGGATGCATTAGGGGTCACTTTACTCAAAAGTGGCAAAGCCTCTAGCGAATT\n//\n//\nContig0:53474-53686;size=1\nCATGTTAACCAGGCCGGGTGCCCCAGGCCTCAGGCATCCTGCCTCTTAACCAGGCCCAGATGCTCTTCTTT--------------------------------------------------------------------------------------------------AACTCGCTGTGTGACTTTGATCAAGTCACTTTGCCTTTCCATG\nContig0:53474-53686;size=1\nCATGTTAACCAGGCCGGGTGCCCCAGGCCTCAGGCATCCTGC---------------------------------------------------------------------------------------------------GAACATTTGGTTTCTCTTCTGCGCCAATAACTCGCTGTGTGACTTTGATCAAGTCACTTTGCCTTTCCATG\n//\n//\nContig0:53920-54018;size=1\nCATGATATGCAGTGGAGGGGAGAGGAACCCCTGGGCCTGGCAGTTCAGAGCCCTGGCACCTCTGGGCTTGCTGCAGCAGTTACAAATGTAAAAAAATT\n//\n//\nContig0:56088-56333;size=10\n-CATGCTTCATTAGTTGTTTGTGTCCAGCTAGCAATGGATTTAAATCAAGAGATAGCCGAGATTGGAATCTG-------------------------------------------------------------------------------------------------TAATAATCAAGAGAGTTTGGGCTGTCTTTTGTTTGCAGTAATGTTTAGCAGCTGACACTGCAGCAGCTGAATAATT\nContig0:56088-56333;size=1\nTCATGCTTCATTAGTTGTTTGTGTCCAGCTAGCAATGGATTTAAATCA-------------------------------------------------------------------------------------------------------------------------TAATAATCAAGAGAGTTTGGGCTGTCTTTTGTTTGCAGTAATGTTTAGCAGCTGACACTGCAGCAGCTGAATAATT\nContig0:56088-56333;size=1\n-CATGCTTCATTAGTTGTTTGTGTCCAGCTAGCAATGGATTTAAATCAAGAGATAGCCGAGATTGGAATCTG----------------------------------------------------------------------------------------------------TAATCAAGAGAGTTTGGGCTGTCTTTTGTTTGCAGTAATGTTTAGCAGCTGACACTGCAGCAGCTGAATAATT\nContig0:56088-56333;size=1\n-CATGCTTCATTAGTTGTTTGTGTCCAGCTAGCAATGGATTTAAATCAAGAGATAGCCGAGATTGGAATCTG--------------------------------------------------------------------------------------------------AATAATCAAGAGAGTTTGGGCTGTCTTTTGTTTGCAGTAATGTTTAGCAGCTGACACTGCAGCAGCTGAATAATT\nContig0:56088-56333;size=1\n-CATGCTTCATTAGTTGTTTGTGTCCAGCTAGCAATGGACTTAAATCAAGAGATAGCCGAGATTGGAATCTG-------------------------------------------------------------------------------------------------TAATAATCAAGAGAGTTTGGGCTGTCTTTTGTTTGCAGTAATGTTTAGCAGCTGACACTGCAGCAGCTGAATAATT\nContig0:56088-56333;size=1\n-CATGCTTCATTAGTTGTTTGTGTCCAGCTAGCAATGGATTTAAATCAAGAGATAGCCGAGATTGGAATCTG--------------------------------------------------------------------------------------------------------------AGTTTGGGCTGTCTTTTGTTTGCAGTAATGTTTAGCAGCTGACACTGCAGCAGCTGAATAATT\n//\n//\nContig0:59313-59545;size=1\nCATGGAGGGTGGGGGCTTCTGGTTGTAACTGGGGATGATCCCTCTGACCAGTTCCATGCCACTGTGG-----------------------------------------------------------------------------------------GCTTCCCCTGGTGTCAGGGCAAGCCCCCGAGAGGAACAATGACAAAGCATCACATACCGAGGTGGAATATTTAATT\n//\n//\nContig0:62283-62553;size=3\n-AATTCAGAACTGCCAGTGAACCAACCCCCCATCCATTATTTGCACAACTCTACTTAAGACTTCAGGCCGCAGAGGT----------------------------------------------------------------------AGCAGTATAGGGCCTATGACACTCAGTGGTGCAGTTCTGATTCCATCCAGCAGAGGACAGTGCTGACCATG----------------------------------------------------\nContig0:62283-62553;size=1\nAAATTCAGAACTGCCAGTGAACCAACCCCCCATCCATTATTTGCACAA---------------------------------------------------------------------------------------------------AGCAGTATAGGGCCTATGACACTCAGTGGTGCAGTTCTGATTCCATCCAGCAGAGGACAGTGCTGACCATG----------------------------------------------------\nContig0:62283-62553;size=1\nAAATTCAGAACTGCCAGTGAACCAACCCCCCATCCATTATTTGCACAAC--------------------------------------------------------------------------------------------------AGCAGTATAGGGCCTATGACACTCAGTGGTGCAGTTCTGATTCCATCCAGCAGAGGACAGTGCTGACCATG----------------------------------------------------\nContig0:62283-62553;size=1\n-AATTCAGAACTGCCAGTGAACCAACCCCCCATCCATTATTTGCACAACTCTACTTAAGACTTCAGGCCGCAGAGGT--------------------------------------------------------------------------------------------------------------------------GAGGACAGTGCTGACCATGCACATACTAACTAGTATGTTATAATGCCCTTATTTTGGGCTTTTCCTCCATG\n//\n//\nContig0:63207-63392;size=1\nCATGTTCAATGGTTTCACTTCAACGCAAACAGCCGTGGCTTCGATTGTGAACAGAAAATGTTGTGGCACAG--------------------------------------TGCCTCGCTGGTCTCTGAGGGCCCTGAAGAGCTTGCACGCCAAAGGACCAGATGGCTGGTGGGTTAAATGGAAATT\nContig0:63207-63392;size=1\nCATGTTCAATGGTTTCACTTCAACGCAAACAGCCGCGGCTTCGATTGTGAACAGAAAATGTTGTGGCACAG--------------------------------------TGCCTCGCTGGTCTCTGAGGGCCCTGAAGAGCTTGCACGCCAAAGGACCAGATGGCTGGTGGGTTAAATGGAAATT\n//\n//\nContig0:64734-65000;size=12\nAATTTGCTGCTAGGCCCAGACTAAAAGCAGGTAGCCACTGCGAATGCACAAGAGAGCCCAATGTAGCCCTGGAGAG-----------------------------------------------------------------------------------------------------------------------CTGTTCTCTGGCTTTGAACGACTGTGTGCTTTGGTGAAATACCGCTCAGAGCCAATGGGGTGGCATTCATG\nContig0:64734-65000;size=1\nAATTTGCTGCTAGGCCCAGACTAAAAGCAGGTAGCCACTGCGAATGCACAAGAGAGCCCAATGTAGCCCTGG---------------------------------------------------------------------------------------------------------------------------CTGTTCTCTGGCTTTGAACGACTGTGTGCTTTGGTGAAATACCGCTCAGAGCCAATGGGGTGGCATTCATG\nContig0:64734-65000;size=1\nAATTTGCTGCTAGGCCCAGACTAAAAGCAGGTAGCCACTGCGAATGCACAAGAGAGCCCAATGTAGCCCTGGAGAG-----------------------------------------------------------------------------------------------------------------------CTGTTCTCTGGCTTTGAACGACTGTGTGCTTTGGTGAAATGCCGCTCAGAGCCAATGGGGTGGCATTCATG\nContig0:64734-65000;size=1\nAATTTGCTGCTAGGCCCAGACTAAAAGCAGGGAGCCACTGCGAATGCACAAGAGAGCCCAATGT-----------------------------------------------------------------------------------------------------------------------------------CTGTTCTCTGGCTTTGAACGACTGTGTGCTTTGGTGAAATACCGCTCAGAGCCAATGGGGTGGCATTCATG\n//\n//\nContig0:65302-65475;size=1\nAATTAAAAACGAACCATAAACAACAACTGAGAAAGGATTCCAGATCGTCTTTTAAAAAAGGAAGCGGGTTCTCCGG--------------------------GAAAAATGCACAGTTGATAGGAAAGAGTGTTAGCTCCATCGTGAGCCCCTGACCCACAAGGGCTGTGCATG\n//\n//\nContig0:67691-68021;size=1\nCATGATCTTATGGTCACTCTACCATTCTCCCATGCACCTGGAAGAGAAAGACAGAAGCTGTGTTAACCGAC-------------------------------------------------------------------------------------------------TTAGCACCTTTCGCTCAAGGGCTTCTAATGCTCCCCTCACCTCCCCTCAATAATGATACTTAGCACTTGATGAATT--------------------------------------------------------------------------------------\nContig0:67691-68021;size=1\n------------------------------CATGCACCTGGAAGAGAAAGACAGAAGCTGTGTTAACCGACTGAACCCATGAGATGCTAACGAGGTCTCTC-------------------------------------------------------------------TTAGCACCTTTCGCTCAAGGGCTTCTAATGCTCCCCTCACCTCCCCTCAATAATGATACTTAGCACTTGATGAATT--------------------------------------------------------------------------------------\nContig0:67691-68021;size=1\n------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTAAATAAGCCTTACCTGCTGTGAAATGTCAATACTATTTTGAAGATGGGGAAACCGAGGCATG\n//\n//\nContig0:68467-69007;size=9\n----------------------------------------------------------------------------------------------------------------------------------AATTGATGCAACTTCTGTGTGTAGAGTAGGCTTCCGTGTTTTCAGATGCACATATTTGGGTCTCTGACCTGGTCCA------------------------------------------------------------------------------------------------TCCCCGGTCCCGCAGAGTCGGTGGCTTGCCCGTGGGAGGTCCCTGGTCCTGTGGATTCGGTGGCTTGCATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:68467-69007;size=1\nAATTTTACCTGAAGTTCATTTCAGAAGGGGAAGCTAAACTGGGGCAAGTGGGTTTAAACTCATGTGACGGTTCCCACACACAAAGTTGCACCAGGGTACCCATG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:68467-69007;size=1\n----------------------------------------------------------------------------------------------------------------------------------AATTGATGCAACTTCTGTGTGTAGAGTAGGCTTCCGTGTTTTCAGATGCACATATTTGGGTCTCTGACCTGGTCCA-------------------------------------------------------------------------------------------------CCCCGGTCCCGCAGAGTCGGTGGCTTGCCCGTGGGAGGTCCCTGGTCCTGTGGATTCGGTGGCTTGCATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:68467-69007;size=1\n----------------------------------------------------------------------------------------------------------------------------------AATTGATGCAACTTCTGTGTGTAGAGTAGGCTTCCGTGTTTTCAGATGCACATATTTGGGTCTCTGACCTG-----------------------------------------------------------------------------------------------------TCCCCGGTCCCGCAGAGTCGGTGGCTTGCCCGTGGGAGGTCCCTGGTCCTGTGGATTCGGTGGCTTGCATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:68467-69007;size=1\n----------------------------------------------------------------------------------------------------------------------------------AATTGATGCAACTTCTGTGTGTAGAGTAGGCTTCCGTGTTTTCAGATGCACATATTTGGGTCTCTGACCTGGTCCA------------------------------------------------------------------------------------------------TCCCCGGTCCCGCAGAGTCGGTGGCTTGCCCGTGGGAGGTTCCTGGTCCTGTGGATTCGGTGGCTTGCATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:68467-69007;size=1\n--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTGCTGCCGAATCCGCAGTACCAGCCGACCTCCCACAGGCAAGCCGTCGAAGGCTGCCTGTCTGCCGCCCTCGCAGAGACCGGCAGGGCTCCCCCTGAAGCTTGCCCTGCCCCAGGGACATG\n//\n//\nContig0:69863-70116;size=19\nAATTTGCTGGGTCCCTGGCCGGGGCAGGCCTGTAACTCAGCAGTCGGTTGTCTCATAGAATCTCAGGGTTGGAAGG----------------------------------------------------------------------------------------------------------TGGGTTTAGCAGGCTAATGCTCAGATCACTGAGCTATCCCTCCCCCCCACTTCACTACCCACTAGGCCATG\nContig0:69863-70116;size=1\nAATTTGCTGGGTCCCTGGCCGGGGCAGGCCTGTAACTCAGCAGTTGGTTGTCTCATAGAATCTCAGGGTTGGAAGG------------------------------------------------------------------------------------------------------------GGTTTAGCAGGCTAATGCTCAGATCACTGAGCTATCCCTCCCCCCCACTTCACTACCCACTAGGCCATG\nContig0:69863-70116;size=1\nAATTTGCTGGGTCCCTGGCCGGGGCAGGCCTGTAACTCAGCAGCCGGTTGTCTCATAGAATCTCA-----------------------------------------------------------------------------------------------------------------------GGTTTAGCAGGCTAATGCTCAGATCACTGAGCTATCCCTCCCCCCCACTTCACTACCCACTAGGCCATG\n//\n//\nContig0:71142-71672;size=1\nAATTAGGCTTGAATAAAAACTGGGAGTGGATGGGCCATTACACAAAGTAAAACTATTTCCCCATGTTTATTTTCCC-----------------------------------------------------------------------------CCTGCTGGTAATAGCTCACCTTAACTGATCACTCTCGTTAGAGTGTGGATGGTAACACCCATTGTTTCATG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:71142-71672;size=1\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTTGTTAGTCTCTAAGGTGCCACAAGGACTCCTGTTCTTTTTGAGGATACAGACTAGCATGGCTGCTACTCTGA------------------------------------------------------GTGCTGAGAGTTCGATGACTCTGGTAGCTGGGGGGATGTTCACTGGGACCCCCCCGCTCCCAGGAGACATG\n//\n//\nContig0:73757-73975;size=1\nCATGGTCTGTTTGCTACCTTGGGGCTGGCCTCGCTGGATGGT---------------------------------------------------------------------------------------------------------GGTGCTCCAGAGACTGGTGCTGAGGATGCAGCAGGGGGTGCTCTCCCTTCTGCATCAGAACGTATCCCATG\n//\n//\nContig0:74377-74795;size=1\nAATTGGTATAAGAATCCAAGGATGGTAACAAGAGCCGAAAATGCCACCCAAGGACTTCGATTCACTGACCTTGTGG-------------GCTGGAAAATGGCAGACACACTTGTTCTTTCTTGTTAGCCTCTGGAGACGCTGATGGGGCCTGAGGACATG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:74377-74795;size=1\n----------------------------------------------------------------------------------------------------------------------------------------------------------------AATTCAGGTCAAGGAGTTAGCACCGGACTGGCAGCCCATGACTGTCCCCTGATACCATCACAGGGCTGG----------------------------------------------------------------------------------------------------------------------TGGGCACCCACACAGATCTGCTGATGCCCGTCACTCCTCACCGACGGCCACCTCGAGGGCAGATTAACATG\n//\n//\nContig0:75032-75494;size=2\nCATGCCCCCACAGGGGGTGTGTGCCCCCGCCGCCCTGGTTGAGAATCTCTGTTTAGAGGGTCAGAGCCCAG---------------------------------------------------------------------------------AAAGCCCCTTGCTCCTGCTACCCAATCGCCTGCCACCCCTGCCACATTCAGAGTCAGATCCAACTTCCCCCGAATT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:75032-75494;size=1\nCATGCCCCCACAGGGGGTGTGTGCCCCCGCCGCCCTGGTTGAGAATCTCTGTTTAGAGGGTCAGAGCCCAG--------------------------------------------------------------------------------------------------------------------CCCCTGCCACATTCAGAATCAGATCCAACTTCCCCCGAATT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:75032-75494;size=1\nCATGCCCCCACAGGGGGTGTGTGCCCCCGCCGCCCTGGTTGAGAATCTCTGTTTAGAGGGTCAGAGCCCAG-------------------------------------------------------------------------------------CCCCTTGCTCCTGCTACCCAACCGCCTGCCCCCCCTGCCACATTCAGAGTCAGATCCAACTTCCCCCGAATT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:75032-75494;size=1\nCATGCCCCCACAGGGGGTGTGTGCCCCCGCCGCCCTGGTTGAGAATCTCTGTTTAGAGGGTCAGAGCCCAG--------------------------------------------------------------------------------------------------------------------CCCCTGCCCCATTCAGAGTCAGATCCAACTTCCCCCGAATT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:75032-75494;size=1\nCATGCCCCCACAGGGGGTGTGTGCCCCCGCCGCCCTGGTTGAGAATCTCTGTTTAGAGGGTCAGAGCCCAG-------------------------------------------------------------------------------------------------------------CCTGCCTCCCCTGCCACATTCAGAGTCAGATCCAACTTCCCCCGAATT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:75032-75494;size=1\nCATGCCCCCACAGGGGGTGTGTGCCCCCGCCGCCCTGGTTGAGAATCTCTGTTTAGAGGGTCAGAGCCCAG--------------------------------------------------------------------------------------CCCTTGCTCCTGCTACACAATCGCCTGCCACCCCTGCCACATTCAGAGTCAGATCCAACTTCCCCCGAATT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:75032-75494;size=1\nCATGCCCCCACAGGGGGTGTGTGCCCCCGCCGCCCTGGTTGAGAATCTCTGTTTAGAGGGTCAGAGCCCAG---------------------------------------------------------------------------------AAAGCCCCTTGCTCCTGCTCCCCAATCGCCTGCCACCCCTGCCACATTCAGAGTCAGATCCAACTTCCCCCGAATT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:75032-75494;size=1\n--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGGAGATTGAGGGTTTTGAATATACAACTTCAGACTGTTAAGTGGGGTTATTGTAGCTTCCCTGCTCAATAGCTTCATACAGAATT\n//\n//\nContig0:76480-77015;size=6\n----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGACACACAGCTGTGTCTTTCTGATTCCATCCAATAGGGGGCAGCAGAGCATATAAACATTAACCAGTT------------------------------------------------------------------------------------ATATGCAGTTTTTCTTGTTCCCCTCCAGAGGGGTCAGTAATAAAACAATGGATATCATGGGATTAGAGCTGAAATT\nContig0:76480-77015;size=4\nCATGTCTTATCTGGATCAGTCAACAGCATCTCTGCCTTTATCGGCCATCTATCTACAATATGTATCTAAGC------------------------------------------------------------------------------------CTACACCTTCTTTTAACAGATCCCTGGGGACCCAGGACATCACTAGGCACTTTATGGTCACAGTAGGGATAGAATT----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:76480-77015;size=2\nCATGTCTTATCTGGATCAGTCAACAGCATCTCTGCCTTTATC-GCCATCTATCTACAATATGTATCTAAGCA-----------------------------------------------------------------------------------CTACACCTTCTTTTAACAGATCCCTGGGGACCCAGGACATCACTAGGCACTTTATGGTCACAGTAGGGATAGAATT----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:76480-77015;size=1\nCATGTCTTATCTGGATCAGTCAACAGCATCTCTGCCTTTATC-GCCATCTATCTACAATATGTATCTAAGCA-----------------------------------------------------------------------------------CTACACCTTCTTTTAACAGATCCCTGGGGACCCGGGACATCACTAGGCACTTTATGGTCACAGTAGGGATAGAATT----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:76480-77015;size=1\n----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGAGAATCTCTATAAGCACTATAGGGCTCATGACACACAGCTGTGTCTTTCTGATTCCATCCAATAGGG------------------------------------------------------------------------------------------------------------------ATATGCAGTTTTTCTTGTTCCCCTCCAGAGGGGTCAGTAATAAAACAATGGATATCATGGGATTAGAGCTGAAATT\nContig0:76480-77015;size=1\n----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGACACACAGCTGTGTCTTTCTGATTCCATCCAATAGGGGGCAGCAGAGCATATAAACATTA-------------------------------------------------------------------------------------------------AGTTTTTCTTGTTCCCCTCCAGAGGGGTCAGTAATAAAACAATGGATATCATGGGATTAGAGCTGAAATT\n//\n//\nContig0:79124-79379;size=1\nCATGGACATAAGCCTTACGCCTCTCCTGGAAGTGGAGTTATGATGTCCGTGCAGTGGGGCGCTTACCTCG-------------------------------------------------------------------------------------------------------------GGTGCTGGGCAAATGGTCGAAGACTTGTTCCAACCTCTCCACTCTTGAGACCCAAACTTTGGGTCTGGTGGGAATT\nContig0:79124-79379;size=1\nCATGGACATAAGCCTTACGCCTCTCCTGGAAGTGGAGTTATGATGTCCGTGCAGTGGGGCGCGTACCTCGA--------------------------------------------------------------------------------------------------------------TGCTGGGCAAATGGTCGAAGACTGGTTCCAACCTCTCCACGCTTGAGACCCAAACTTTGGGTCTGGTGGGAATT\nContig0:79124-79379;size=1\nCATGGACATAAGCCTTACGCCTCTCCTGGAAGTGGAGTTATGATGTCCGTGCAGTGGGGCGCTTACCTCGA---------------------------------------------------------------------------------------------------------------------------------------------CCTCTCCACTCTTGAGACCCAAACTTTGGGTCTGGTGGGAATT\n//\n//\nContig0:79959-80151;size=1\nAATTTTTAAGCAACAGGATTTACTACCCGCTGTGGGAGGTGACAGGCTGATGGCTGTCTGGGAGACCCTCTCTGTC---------------------------------------------GTAGCCTGAACAGGCAGGATCCCACTAGTTGGCCATCTCCTCCATCCTCCCGCTAGCCACAGGTAGACATG\nContig0:79959-80151;size=1\nAATTTTTAAGCAACAGGATTTACTACCCGCTGTGGGAGGTGACAGGCTGATGGCTGTCTGGGAGACCCTCTCTGTC---------------------------------------------GTAGGCTGAACAGGCAGGATCCCACTAGTTGGCCATCTCCTCCATCCTCCCGCTAGCCACAGGTAGACATG\n//\n//\nContig0:81339-81852;size=11\nCATGCGTAGGCTTGGAAACCCCTAGGGCACAAGCCTGGTGCCTGCAGAGCACAGGGTCGATCTCCCCGGAC--------------------------------------------------------------------------------------------------TTTATTACACGTTCTAGGATGAAGCCACTGTGGCAGGGAAGCTCTCCGGAAATGTAGAAGTTAAAACGTTATAATT----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:81339-81852;size=6\nCATGCGTAGGCTTGGAAACCCCTAGGGCACAAGCCTGGTGCCTGCAGAGCACAGGGTCGATCTCCCCGGAC--------------------------------------------------------------------------------------------------TTTATTACACGTTCTAGGATGAAGCCACTGTGGCAGGGAAGCTCTCCGGAAATGTAGAAGTTAAAACGCTATAATT----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:81339-81852;size=4\n-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTTGAAAAGTCAGGAAAAGAGCAGAGGCCGGACGTGGCTTCTCTCCCCTTGGCTTCACTGGGTTTTGATTGGCC-----------------------------------------------------------------------------------------------------------------------------GCAGGACCTCGGTCACTCTGGGCCTCATTCTGCTCTCCGGGATGCTGGTTTAGTGCTGCTGGGACTCCATG\nContig0:81339-81852;size=1\nCATGCGTGGGCTTGGAAACCCCTAGGGCACAAGCCTGGTGCCTGCAGAGCACAGGGTCGATCTCCCCGG------------------------------------------------------------------------------------------------------TATTACACGTTCTAGGATGAAGCCACTGTGGCAGGGAAGCTCTCCGGAAATGTAGAAGTTAAAACGTTATAATT----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:81339-81852;size=1\nCATGCGTAGGCTTGGAAACCCCTAGGGCACAAGCCTGGTGCCTGCAGAGCACAGGGTCGAT--------------------------------------------------------------------------------------------------------------------------TAGGATGAAGCCACTGTGGCAGGGAAGCTCTCCGGAAATGTAGAAGTTAAAACGTTATAATT----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:81339-81852;size=1\n-ATGCGTAGGCTTGGAAACCCCTAGGGCACAAGCCTGGTGCCTGCAGAGCACAGGGTCGATCTCCCCGG----------------------------------------------------------------------------------------------------TTTATTACACGTTCTAGGATGAAGCCACTGTGGCAGGGAAGCTCTCCGGAAATGTAGAAGTTAAAACGTTATAATT----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:81339-81852;size=1\n-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTTGAAAAGTCAGGAAAAGAGCAGAGGCCGGACGTGGCTTCTCTCCCCTTGGCTTCACTGGGTTTT-------------------------------------------------------------------------------------------------------------------------------------GCAGGACCTCGGTCACTCTGGGCCTCATTCTGCTCTCCGGGATGCTGGTTTAGTGCTGCTGGGACTCCATG\n//\n//\nContig0:82206-82699;size=4\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTCCATTCTTCCTTTCCCATACCTCCCGCCCTGCTCCTTTCCTCTCTTGATTTCTTCTTGAGGGAGGCAGAGGA----------------------------------------------------------------------------------------GGCACCAGATTTTCTCACTGTTCAGGTCAGGGTTTGACTTCAGCCCCATCTCTAATACAAGCCATG\nContig0:82206-82699;size=1\nCATGCAATGCAACAAGGTCTGGAGCTCCTTAGCGAGCCTTCGAGCCACCCAGTCCCTGAAATACACCCCCTGGTCCCTTTCAGGCTGGATCCAGAATGGAAAGTGTAACACCAATT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:82206-82699;size=1\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTCCATTCTTCCTATCCCATACCTCCCGCCCTGCTCCTTTCCTCTCTTGATTTCTTCTTGAGGGAGGCAGAGGA----------------------------------------------------------------------------------------GGCACCAGATTTTCTCACTGTTCAGGTCAGGGTTTGACTTCAGCCCCATCTCTAATACAAGCCATG\nContig0:82206-82699;size=1\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTCCATTCTTCCTTTCCCATACCTCCCGCCCTGCTCCTTTCCTCTCTTGATTTCTTCTTGAGGGAGGCAGAGG-----------------------------------------------------------------------------------------GGCACCAGATTTTCTCACTGTTCAGGTCAGGGTTTGACTTCAGCCCCATCTCTAATACAAGCCATG\nContig0:82206-82699;size=1\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTCCATTCTTCCTTTCCCATACCTCCCGCCCCGCTCCTTTCCTCGCTTGATTTCTTCTTGAGGGAGGCAG-------------------------------------------------------------------------------------------TGGCACCAGATTTTCTCACTGTTCAGGTCAGGGTTTGACTTCAGCCCCATCTCTAATACAAGCCATG\nContig0:82206-82699;size=1\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTCCATTCTTCCTTTCCCATACCTCCCGCCCTGCTCCTTTCCTCTCTTGATATCTTCTTGAG---------------------------------------------------------------------------------------------------TGGCTCCAGATTTTCTCACTGTTCAGGTCAGGGTTTGACTTCAGCCCCATCTCTAATACAAGCCATG\n//\n//\nContig0:83146-83367;size=1\nAATTCCATCCTGATCCTTCGAACCCAGCACGTTCCTACAACAGGTTTCAGTTTTGACGAACTGGTATTTGTTGCTG-----------------------------------------------------------------------------CCTACCAAGCACCCGAGGGTGTCAGTGGCTGCCCTGAAATGACGGGCACTGCTTGTTGCTGCCCCATG\nContig0:83146-83367;size=1\nAATTCCATCCTGATCCTTCGAACCCAGCACGTTCCTACAACAGGTTTCAGTTTTGACGAACTGGTATTTGTTGCTG--------------------------------------------------------------------------CACCCTACCAAGCACCCGAGGGTGTCAGTGGCTGCCCTGAAATGACGGGCACTGCTTGTTGCTGCCCCATG\n//\n//\nContig0:86329-86448;size=1\nAATTTGGGGGCACTGCTTTTTGGTGCCCCCAAATCTCGGTGCCACCGCCTAGTTCACCTAGTGGTTACACCGGCCCTGAGCGTCTGGGGAGTCTTTCCTCCCGAAGGCCCAGCAGCATG\n//\n//\nContig0:95645-95887;size=10\nAATTGAGGCAACTTTAACTGATATGTCCTGAAGTCACGAGGCCTTAATGGGCATCCTACGTGTAAAGGGCATCCTT-----------------------------------------------------------------------------------------------AAGTGATAAGAAGACTTTTTTATATACTGTGCAAGACTAACCTGTGCAACTCATTGCTACAGGATATCATG\nContig0:95645-95887;size=4\nAATTGAGGCAACTTTAACTGATATGTCCTGAAGTCACGAGGCCTTAATGGGCATCCTAGGTGTAAAGGGCATCCTT-----------------------------------------------------------------------------------------------AAGTGATAAGAAGACTTTTTTATATACTGTGCAAGACTAACCTGTGCAACTCATTGCTACAGGATATCATG\n//\n//\nContig0:98705-98903;size=1\nCATGCAGTGTAGCCGCTGTTTGTTGGCAGTGCGACAAAAAACTTCCACCATCAATGAGTGGCGTTTACATT---------------------------------------------------TCGCGGTAAAACTTTTGTCTTTCGGGGTGGGGTGGGGATGTTTGGGTTAACACCACAAAAGTTTTGTCATTCAATT\n//\n//\nContig0:99204-99415;size=1\nCATGTGAGGGCAGGAGAAATGCCAAATCCAATCACTGCCTAGGGACAACTCTGACCTTTACCCCGAACAGC--------------------------------------------------------------------------------------------CCAGGTTTGATCCCTCACTGGTGAGGAGTCCTGCAGAGGTAGCACATG\nContig0:99204-99415;size=1\nCATGTGAGGGCAGGAGAAATGCCAAATCCAATCACTGCCTAGGGACAACTCTGACCTTTACCCCGAACAGC----------------------------------------------------------------------------------------------------------TCACTGGTGAGGAGTCCTGCAGAGGTAGCACATG\n//\n//\nContig0:101553-101788;size=4\n----CATGTAACTTCATAATAGACAAGTCAGGCAACAGCCTGGATTTCACGAAGGGCTGGAGGATTTCAGGC---------------------------------------------------------------------------------------CGCCCTAGGGAGCAATCCTGTATTGCTCCCAGGGAGACTGACTGGGTGATCCAACAGTGCTTTTCTGTCTCTAATT\nContig0:101553-101788;size=3\nCATGCATGTAACTTCATAATAGACAAGTCAGGCAACAGCCTGGATTTCACGAAGGGCTGGAGGATTTCAGG----------------------------------------------------------------------------------------CGCCCTAGGGAGCAATCCTGTATTGCTCCCAGGGAGACTGACTGGGTGATCCAACAGTGCTTTTCTGTCTCTAATT\nContig0:101553-101788;size=1\nCATGCATGTAACTTCATAATAGACAAGTCAGGCAACAGCCTGGATTTCACGAAGGGCTGGAGGATTTCAG-----------------------------------------------------------------------------------------CGCCCTAGGGAGCAATCCTGTATTGCTCCCAGGGAGACTGACTGGGTGATCCAACAGTGCTTTTCTGTCTCTAATT\n//\n//\nContig0:102134-102336;size=1\nAATTTGGCCTGTTCTCCTTGCACTAGAAGTTGATATTTCCCTGCACATACGCTGCTCTTGCACGGACTCTTCTGTT-------------------------------------------------------CTCTCCTCCTTGCTCTAACACTCAAGTAAAAAATCGCCATCCCACATTATACTCCCCAATCATCCCGCATG\n//\n//\nContig0:103469-103709;size=16\nCATGGTCTACTAATCACAGAGATGCCACTGAGCTGGCCCTAGAACCAGGTGCAGCGTACGCAACTCTGTGC---------------------------------------------------------------------------------------------CCCCTGGTTTGTCCCCAGGCAGGAGGCGAGTAGAGGCCTCCAGGTATCAGTTTCTATCTGTCGTGTTTAACAAATT\nContig0:103469-103709;size=1\nCATGGTCTACTAATCACAGAGATGCCACTGAGCTGGCCCTAGAACCAGGTGCAGCGTACGCAACTCTGTGC---------------------------------------------------------------------------------------------CCCCTGGTTTGTCCCCAGGCAGGAGGCGAGTAGAGGCCTCCTGGTATCAGTTTCTATCTGTCGTGTTTAACAAATT\n//\n//\nContig0:104258-104379;size=1\nAATTTTGAAAGATGTTACTATTTATTTAGTTTTTGGCTCGTTGTCGTTCCTGATTGACTTTGTTTTGCTGGAGCAGTTGGGAGGGATTATTTCACTCTTCTTGGCAGCTTCCACATACATG\n//\n//\nContig0:104739-105191;size=1\nAATTACCAAGGGTTCCAGGTGGGCTTGTACCACTCGTGCTTCAGCCCGTGCTGTTATAGCTTCGCTGCTTCTGTAC----------------------------------------------------------------------------------------------------------CTTAATGTGCTAGCGAAAGGGTTGATGAATT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:104739-105191;size=1\nAATTACCAAGGGTTCCAGGTGGGCTTGTACCACTCGTGCTTCAGCCCGTGCTGTTATAGCTTCGCTGCTTCTGTAC----------------------------------------------------------------------CGTCAACAAACCTTTAGTCTCAATCAGCGACCTCCACTTAATGTGCTAGCGAAAGGGTTGATGAATTCATG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:104739-105191;size=1\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTCATGTAACGATTAAAAAACGACAACAACAATAAACAGCACAAAAGTATATTACTTTAGTGTCCACCACAGCA------------------------------------------------------------------------------------------------AAAAAAGATACCCGGACGCCAGCAGTTTTCACCCTGTAACGGGAGAAAAGCCACTCTATGAAGTCCACATG\n//\n//\nContig0:106701-106806;size=1\nCATGGCCTTGGTGAGAGAGCTCTGTTCCAGAATCTGAGAGTCTCTTGTCACCACACACTTCTTCAGCTCCAGTATTTAGAGCCCCGGGCTGGGATTCACAAAATT\n//\n//\nContig0:107637-108115;size=13\n-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGACCTCACAGTCCCTTTCAGCCTGACACTTCTACGATTCTATGATGCACTGGAAGAGAACGGTCAGAA------------------------------------------------------------------------------------------------AGGGGTAGAGCTTTTCTAGTTTGGCTGTGCCAGCAAACTTTCCCAGTTAGAGCTGCAAGAGGCAGACTGAAAAATT\nContig0:107637-108115;size=1\nCATGTCCACTGAGGGTAGGACAAGAAGTAACGGGCTTAATC----------------------------------------------------------------------------------GGAGGTTGTGGAATCCCCGTCACAGGAGGTTTTGAAGAACAAGTCAGACAAACGCCTGTCAGGGATGGTCTAAATT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:107637-108115;size=1\n-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGACCTCACAGTCCCTTTCAGCCTGACACTTCTACGATTCTATGATGCACTGGAAGAGAACGGTCAGAA--------------------------------------------------------------------------------------------------------AGCTTTTCTAGTTTGGCTGTGCCAGCAAACTTTCCCAGTTAGAGCTGCAAGAGTCAGACTGAAAAATT\n//\n//\nContig0:110181-110429;size=3\nAATTGCATTTGTAAACCATTTCACGACTTTGATGCACGATTTGCATGCATGATTTATCCCGGTCATATGAGATGAT-----------------------------------------------------------------------------------------------------ACAACAAAAAACAGCACCCCGAGCTCAGTGCCCCCCAACCTGGCAGCCCAGGTGGTTGCCTGGGTCACATG\n//\n//\nContig0:111104-111195;size=1\nCATGCAAGCAGACCAGAGCTGCCCCAAAGCACTGACGGTGGCCCCTTGACCTGTTTGAAATGCTACATTTGCCCCAAGCTGCAAGTGAATT\n//\n//\nContig0:111481-111551;size=1\nAATTAAAAATAGGCTGCGGCAAGAATGGATCCTCTGAACAGCGAAGGGAGGTACGTGGCAGGCTCCATGC\n//\n//\nContig0:112880-113019;size=1\nCATGCTGCTGCTTCCAGGAGCTGCCTGAGGTAAGCGCTGCCCAGAGCCTGCAACTCGGCTATCTCAAAAACTGACTGCGACAGTGAAAAATGAACAGTTCAAGGCTGAGAGCATCTGGTGATAAACTAACAAAAAAATT\n//\n//\nContig0:113494-113752;size=1\nAATTTCCATACCCAGATGTGTCCACGCCTGGACTCTACAGCATGCCGCAATCACATCTTGATAGCTGTGTCTACAC---------------------------------------------------------------------------------------------------------------ACAGGATTGTACTTGGGTGCTCAGCAACCCCAACCAGGGGCGGCTCTAGACATTTTGCCACCCCAAGCATG\n//\n//\nContig0:120824-121022;size=1\nAATTAGAGCAACCTCGAGGCTGCTCTACCTTACAGTAGGAGCAGGATAGACTCCCGGATAGTTCCAGAATATGATC---------------------------------------------------CACCAAAAGCTGCTGGGCTGGGACAGAGAACTGAGGTCTGTTTCTCTCAAGAAAGAACCAGGTTTCACATG\nContig0:120824-121022;size=1\nAATTAGAGCAACCTCGAGGCTGCTCTACCTTACAGTAGGAGCAGGATAGACTCCCGGATAGTTTCAGAATATGATC---------------------------------------------------CACCAAAAGCTGCTGGGCTGGGACAGAGAACTGAGGTCTGTTTCTCTCAAGAAAGAACCAGGTTTCACATG\n//\n//\nContig0:122245-122688;size=1\nAATTCCTCTAAAGCCGGTGTTCTGGGGGACGGACTGACTCAGGGGTACGGGTAAGGGAATAGATCACAGATCACTT-------GGTCCAGGATTGTACTTCCCAAATATGACGACTATATTGTACCCTTGCCTGTGCAAAAGGAGACATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:122245-122688;size=1\n-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTTGCATCGACTTCAAGAAACGGTCCCTCCGGGGCAAGGCAGAGGCAGGCT------------------------------------------------------------------AGTAGGATGGCCAAAGGGTTATACCCTTCAGATCTCCTTTACAGGTGCCATCTTCCGAGTCCGACTGCATG\nContig0:122245-122688;size=1\n-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTTGCATCGACTTCAAGAAACGGTCCCTCCGGGGCAAGGCAGAGGCAGGCTTGTAGGGTATGGGATGGGGGAAA-------------------------------------------AGTAGGATGGCCAAAGGGTTATACCCTTCAGATCTCCTTTACAGGTGCCATCTTCCGAGTCCGACTGCATG\n//\n//\nContig0:124322-124627;size=1\nAATTACCCACCTTTCAGAAGGCTGATTAATGACATGGCTCTGCAGCATGCACAGTAGCTCAACCTCCTGCATGAAA-----------------------------------------------------------------------CAAGCTATTCTTTCCCAGATCTGGCGTAGGAAGTGCGGCGCTTCCAGCGGCCAGGGAAGATCCCTGCCATG---------------------------------------------------------------------------------------\nContig0:124322-124627;size=1\n---------------------------------------------CATGCACAGTAGCTCAACCTCCTGCATGAAACACACTCATCTCTCTCTCTCCTGAGATTGACAGGTCACGG-----------------------------------------------------------------------------------------------------------------GGCTTTGTAGCAAACTCAGCAGAAGCGGCTTCTCACAGAGTCAATCCTGGATACAGAAGATTTGCCATAGCAAATT\n//\n//\nContig0:127068-127278;size=1\nCATGATCCTGGGAGCAGCCAGTGAAGAAAGCGAAATGCCATTATAGTGGG-----------------------------------------------------------------------------------------AGGTGGGGCGAGCTGAGGACATCTTCCCAGACCATCTCTGAGCAGCACAGAGGGTGGATCCCAGCTCCATG\n//\n//\nContig0:128188-128337;size=1\nCATGGCTTCCTCTTTTATAAGGCTCAGGTGCCTTCCCTTAAGCCCAGTCGGGCAGCAGCTAATCAGACACA--TGCCCTGGACGCCCCCCCTCCCTTCTGCTTCCTCCCTTAAAGGGGCCAGTCACCCTGTGACAGCAACATTTCAATT\n//\n//\nContig0:129266-129465;size=1\nAATTTACAAAATATGAAAGGAAACACTTCTTCACATAGCCTGTGGTACGCACTGCCCAGAGGGGCCAGAGAGTCCA------------------------------------------------------------------------------------CCTCAAGCTTCAGGACATCAACTGGTCAGGAAGGAATTT\n//\n//\nContig0:132993-133221;size=2\nAATTAATTCTGTTTTGTGTGTGAATAAAAAATGTGTGTGTTAAAAGAGAAATGTAAAGGCCATCACTGAACCTGAT---------------------------------------------------------------------------------ATTGACGACTCTAAAATATAAGACAGGCCCGTATCAATGAGGACAAAATAGCTGTAATCAAAACCAGCATG\nContig0:132993-133221;size=1\n----AATTCTGTTTTGTGTGTGAATAAAAAATGTGTGTGTTAAAAGAGAAATGTAAAGGCCATCACTGAACCTGATGGTC-----------------------------------------------------------------------------ATTGACGACTCTAAAATATAAGACAGGCCCGTATCAATGAGGACAAAATAGCTGTAATCAAAACCAGCATG\n//\n//\nContig0:135395-135645;size=11\nAATTACCTGGGACTCCTGCCTGGCTGGGGAGATCTGCTAGGCAATCAGATCCCACCTTCAGCACAGGGTATTCTAA-------------------------------------------------------------------------------------------------------CCGGAGGTGGCAAGCTTTACCCACCCCGTAATCAGCGCTGCGGCAAGCTGCATTATACTCCCAGAATCATG\nContig0:135395-135645;size=1\nAATTACCTGGGACTCCTGCCTGGCTGGGGAGATCTGCTAGGCAATCAGATCCCACCTTCAGCACAGGGTATT-----------------------------------------------------------------------------------------------------------CCGGAGGTGGCAAGCTTTACCCACCCCGTAATCAGCGCTGCGGCAAGCTGCATTATACTCCCAGAATCATG\nContig0:135395-135645;size=1\nAATTACCTGGGACTCCTGCCTGGCTGGGGAGATCTGCTAGGCAATCAGATCCCACCTTCAGCACAGGGTATTCTAA---------------------------------------------------------------------------------------------------------------------------------CGTAATCAGCGCTGCGGCAAGCTGCATTATCCTCCCAGAATCATG\nContig0:135395-135645;size=1\nAATTACCTGGGACTCCTGCCTGGCTGGGGAGATCTGCTAGGCAATCAGATCCCACCTTCAGCACAGGG-----------------------------------------------------------------------------------------------------------------GGAGGTCGCAAGCTTTACCCACCCCGTAATCAGCGCTGCGGCAAGCTGCATTATACTCCCAGAATCATG\n//\n//\nContig0:137333-137570;size=2\nCATGGTGTTCTATCTGCTGAGCGTGTTCTGAGGCTTTTCAACCTAAAAAGTAAGTAAAACTGGAAATG---------------------------------------------------------------------------------------------ACCTGCGTATTTTACCCTTCTCTCCCCAAAATCACGTCTGAACAAAACCCATGGAATGCTCAATACATAGTCAATT\n//\n//\nContig0:137831-137897;size=1\nCATGTTTCCTAGCAACCTTAAACTTCAAGTTGACTAAGTTTGTTGTCTGGAAACCTACTATAAATT\n//\n//\nContig0:139289-139543;size=1\nAATTGAACCTGGCTTGACTGAGTTCTAGGCTCATGCCCCAGCCTCTGCACTGTGCTCCTTCTTACCAAGAGCCATC-----------------------------------------------------------------------------------------------------------TACGGTCATGTCGTACGTAGGGCTCTGTCCTGCTCCCACTGCAGCCAGTGGCAAACTCCCGTCACTTCATG\n//\n//\nContig0:140146-140422;size=1\nAATTCATTTGAATATAAATACTGTACTTACAAGTCAGTGTGTAGAGCAGTATAAACAGCTCATTGTCTGTATGAAG---------------------------------------------------------------------------------------------------------------------------------TGAAGAACCACTGCTTTAAAGGGCAGTGTGGGAGGTTGGAAAGTTGTATGATTGTTAGGAGGGACAGAGCA\n//\n//\nContig0:141889-142575;size=3\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTTGGGATCTGAAAGGCTGTGGCAGCCAGAGAAAGAGGCGACTTTCCCCAGCTCCAGGGCTGCAGCTGCTGGGG---------------------------------------------------------------------------------------------------GAGGCATGGAAGAGAGGGCACATCCATTGCATTAGAAAGGCAAAACTACTGATATGAAAATATGAGTCATG---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:141889-142575;size=1\nCATGCTCCATTCAGGTGGNCCGCGGACAGTTCCCTCTAAGACACGCGCCTGTGTGGCTGCACACGAGAGAATAAAGGACCACCCACTTAATT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:141889-142575;size=1\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTTGGGATCTGAAAGGCTGTGGCAGCCAGAGAAAGAGGCGACTTTCCCCA---------------------------------------------------------------------------------------------------------------------------GAGGCATGGAAGAGAGGGCACATCCATTGCATTAGAAAGGCAAAACTACTGATATGAAAATATGAGTCATG---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:141889-142575;size=1\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTTGGGATCTGAAAGGCTGTGGCAGCCAGAGAAAGAAGCGACTTTCCCCAGCTCCAGGGCTGCAGCTGCTGGGG---------------------------------------------------------------------------------------------------GAGGCATGGAAGAGAGGGCACATCCATTGCATTAGAAAGGCAAAACTACTGATATGAAAATATGAGTCATG---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:141889-142575;size=1\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTTGGGATCTGAAAGGCTGTGGCAGCCAGAGAAAGAGGCGACTTTCCCCAGCTCCAGGGCTGCAGCTGCTGGGG---------------------------------------------------------------------------------------------------GAGGCATGGAAGAGAGGGCACATCCATTGCATTAGAAACGCAAAACTACTGATATGAAAATATGAGTCATG---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:141889-142575;size=1\n-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATTTTCAAGTGGTCTGTGGAAAAAAGAAACTGAGAACCAGTGCATTAGAGGAAGACATAGAAGAAGGTTCAGAGTTTGGAGCAGAAGACGATTCCATGCATG\n//\n//\nContig0:143165-143277;size=1\nCATGGCTGGTGGAGGATGCTCAGTAACACCTAATACCTCTGCTGGCAGATATTGTTAACCCCATCTTTAGAGAGGGAAATCTGCCCTAAAGCCTGCCACATTTTGACCAATT\n//\n//\nContig0:143580-143810;size=8\nAATTTTTCTTCTGGTTTGTCACAGTTTGCTAATATTCCTGCTCAGAGGGAAAAAAAAAGTCATACTTTAGACTTTG-----------------------------------------------------------------------------------CTTCCAACTCTTACCTGCTGCTAATCTTTGTTCAAACGTTGATGGCTCCAGGACGTGCCTTGGCCAACATG\nContig0:143580-143810;size=1\nAATTTTTCTTCTGGTTTGTCACAGTTTGCTAATATTCCTGCTCAGAGGGAAAAAAAA-------------------------------------------------------------------------------------------------------------------------GCTAATCTTTGTTCAAACGTTGATGGCTCCAGGACGTGCCTTGGCCAACATG\nContig0:143580-143810;size=1\nAATTTTTCTTCTGGTTTGTCACAGTTTGCTAATATTCCTGCTCAGAGGGAAAAAAAAAGTCATACTTTAGACTTTG------------------------------------------------------------------------------------------------CCTGCTGCTAATCTTTGTTCAAACGTTGATGTCTCCAGGACGTGCCTTGGCCAACATG\nContig0:143580-143810;size=1\nAATTTTTCTTCTGGTTTGTCACAGTTTGCTAATATTCCTGCTCAGAGGGAAAAAAAAAGCCATACTTTAGACTTT------------------------------------------------------------------------------------CTTCCAACTCTTACCTGCTGCTAATCTTTGTTCAAACGTTGATGGCTCCAGGACGTGCCTTGGCCAACATG\n//\n//\nContig0:145164-145418;size=5\nAATTTAACGTCCATTAGACATTCTGAAGGGGTAACGCTGAGTTAGACATTGGGTGCTGGTATTGTGATGGGGAAAG-----------------------------------------------------------------------------------------------------------CATCTGACAAATGGTCCCAGTGCCCTCCGAGAGGTGGAGAAGAATACGAATATTTAACATTTATAATCATG\nContig0:145164-145418;size=1\nAATTTAACGTCCATTAGACATTCTGAAGGGGAAACGCTGAGTTAGACATTGGGTGCTGGTATTGTGAT------------------------------------------------------------------------------------------------------------------------GACAAATGGTCCCAGTGCCCTCCGAGAGGTGGAGAAGAATACGAATATTTAACATTTATAATCATG\nContig0:145164-145418;size=1\nAATTTAACGTCAATTAGACATTCTGAAGGGGTAACGCTGAGTTAGACATTGGGCGCTGGT---------------------------------------------------------------------------------------------------------------------------CATCTGACAAATGGTCCCAGTGCCATCCGAGAGGTGGAGAAGAATACGAATATTTAACATTTATAATCATG\nContig0:145164-145418;size=1\nAATTTAACGTCCATTAGACATTCTGAAGGGGTAACGCTGAGTTAGACATTGGGTGCTGGTATTGTGATGGGGAAAG----------------------------------------------------------------------------------------------------------------GACAAATGGTCCCAGTGCCCTCCGAGAGGTGGAGAAGAATACGAATATTTAACATTTATAATCATG\nContig0:145164-145418;size=1\nAATTTAACGTCCATTAGACATTCTGAAGGGGTAACGCTGAGTTAGACATTGGGTGCTGGTATTGTGATGGGGAAAG-----------------------------------------------------------------------------------------------------------CATCTGACAAATGGTCCCAGTGCCCTCCGGGAGGTGGAGAAGAATACGAATATTTAACATTTATAATCATG\n//\n//\nContig0:145642-145890;size=1\nCATGGGCAGCGGACGTGTGTTGGCGTGGCTCTCTGCTGTGTACCAGGCCTTATTAACACAAAACGGTACGC-----------------------------------------------------------------------------------------------------TGTCAAATAGCAACAGGGCTGACCCAAAACCCATGTCCAAACATCCAAGAACTTCACAAAAGTTCAACTCTCAATT\nContig0:145642-145890;size=1\nCATGGGCAGCGGACGTGTGTTGGCGTGGCTCTCTGCTGTGTACCAGGCCTTATTAACACACAACGGTACGC-----------------------------------------------------------------------------------------------------TGTCAAATAGCAACAGGGCTGACCCAAAACCCATGTCCAAACATCCAAGAACTTCACAAAAGTTCAACTCTCAATT\nContig0:145642-145890;size=1\nCATGGGCAGCGGACGTGTGTTGGCGTGGCTCTCTGCTGTGTACCAGGCCTTATTAACACAAAACGGTACGC--------------------------------------------------------------------------------------------------------CAAATAGCAACAGGGCTGACCCAAAACCCATGTCCAAACATCCAAGAACTTCACAAAAGTTCAACTCTCAATT\nContig0:145642-145890;size=1\nCATGGGCAGCGGACGTGTGTTGGCGTGGCTCTCTGCTGTGTACCAGGCCTTATTAACACAAAACGGTACGC-------------------------------------------------------------------------------------------------AAGGTGTCAAATAGCAACAGGGCTGACCCAAAACCCATG-----------------------------------------\n//\n//\nContig0:146117-146349;size=1\nAATTAAACACGGGGTAAGCCACTGCCATGCTCTGACCCAAAGGTGGCTGGTGTATTTTCAGTAGCGGGAATGCACG-------------------------------------------------------------------------------------CCTAGAGCCCTGGGCCAGAGCAAGTGAGGGCCCCCCCACCCCCTCCAGTCCTGCCTCCAGCCCTGTTCATG\nContig0:146117-146349;size=1\nAATTAAACACGGGGTAAGCCACTGCCATGCTCTGACCCAAAGGTGGCTGGGGTATTTTCAGTAGCGGGAATGCACG-------------------------------------------------------------------------------------CCTAGAGCCCTGGGCCAGAGCAAGTGAGGGCCCCCCCACCCCCTCCAGTCCTGCCTCCAGCCCTGTTCATG\n//\n//\nContig0:148378-148953;size=2\nCATGCTGCAGCTAGGGGGAAATGGGGGCAGGGCCGGGGGAACCTCACCCTCCCCAGCTGCGAATCCTGGGA---------CGATCCGACCCGCGGACTGGAGTTCTTTACCCACCCCCTGGCCCTTTAACAACCGGTTCTCCACGGAGGTCTAATT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:148378-148953;size=1\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGTCCTGGTTCACTTCTTCGTCTCTTATAACTTTTGTATTGAGTGCAATACATAGGGCTTGCAAAAATG---------------------------------------------------------------------TCTGGCTGTCCTGTGGAGATTTACAAACCAACCTGCTTCATTGACAGTGCAGTGGGGTTTTCTTCTGTTTCAAATT\nContig0:148378-148953;size=1\n-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGTCCTGGTTCACTTCTTCGTCTCTTATAACTTTTGTATTGAGTGCAATACATAGGGCTTGCAAAAATG------------------------------------------------------------------------------------------------------------ATTGACAGTGCAGTGGGGTTTTCTTCTGTTTCAAATT\n//\n//\nContig0:149397-149596;size=2\nAATTTGAGTTCAGCTGTATTTGCAAAGAGGTGAAACACTCCAAACCAAAATGAAACATTTCGTTTGACAGAACTGT----------------------------------------------------CTCAGCCAACCAACTGAAAAATCTGTTATTTTAACAGCTCTACCCAGGAAGCAAGAGCCAGAGCAGGCATG\n//\n//\nContig0:150178-150481;size=1\nAATTCTTCGGAAGAGCCTTTTTTTCTAACCTGTGATGCAAGCTGGTCCATTTCAAACACCAGGTATCTCCAAAACAATCCCCACAGGCAGCTACCCCCCTCCAAGATCTCCAAGCACCACTTCAAATAGCGACATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:150178-150481;size=1\nAATTCTTCGGAAGAGCCTTTTTTTCTAACCTGTGATGCAAGCTGG-----------------------CCAAAACAATCCCCACAGGCAGCTACCCCCCTCCAAGATCTCCAAGCACCACTTCAAATAGCGACATG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:150178-150481;size=1\n------------------------------------------------------------------------------------------------------------------------------------CATGTGGAATCCTGTGCTATCAATCTGCTCCTCAAAAGAGCCCCCATGAACAAAGCAGAGATGTTCCTAGC------------------------TAGCAAATCCCAGCCTGGCCCTGGGTTCAGACAAGCTCAAACCAGCACAATACAAAGAGAAGAGCAAAGTCTAATT\n//\n//\nContig0:150767-151006;size=17\nAATTTGATACCATATTCTTCTCTATTTTCTGTCCTCTGTGCACCAAAATCCCACCAAAGCAGGATGTTTGGGGGGT--------------------------------------------------------------------------------------------GTAGATGTGTTTTGGTTGCAGGGAGGCATTCTCTGGGGACATACACATAGGAATGCCTGTATGTCTACATG\nContig0:150767-151006;size=1\nAATTTGATACCATATTCTTCTCTATTTTCTGTCCTCTGTGCACCAAAATCCCACCAAAGCAGGATGTTTGGGGGG---------------------------------------------------------------------------------------------GTAGATGTGTTTTGGTTGCAGGGAGGCATTCTCTGGGGACATACACATAGGAATGCCTGTATGTCTACATG\n//\n//\nContig0:151911-152069;size=1\nAAATTGCATCGAAGAGCACAGAGGCAGGG----------------------------------------------------------TAAACCTACGCTAATGAACACAAATATAGAAGAGAAGGATCACCCAGCAGTGTTAAGGAAGCCGCCGCATG\n//\n//\nContig0:152674-152906;size=7\nAATTTCCCAGCTTCTGGGGAGTCCTGGAGAGGAGGAAGAATCCGGATCCCTTTGCAGACGGCATTGCTGCTGCACT----------------------------------------------------------------------------------------CTCTGAAGAAGGGCCAGGGGTCAGGGGTGAAAGTATGGGCTTCGTTGTGAGGAAGTGTCCCTGCCATG\nContig0:152674-152906;size=1\nAATTTCCCAGCTTCTGGGGAGTCCTGGAGAGGAGGAAGAATCCGGATCCCTTTGCAGACGGCATTGCTGCTGCACT----------------------------------------------------------------------------------------CTCTGAAGAAGGGCAAGGGGTCAGGGGTGAAAGTATGGGCTTCGTTGTGAGGAAGTGTCCCTGCCATG\n//\n//\nContig0:153152-153312;size=1\nCATGTACGTCGCACGACTGAATCAATGGGGCTCCTCACCCACTTAAAGGGAATCGTGGGGTTCAGTGCTTT-------------AGGTTTGTTTTGCCACCTCCTGGACTGGCACCTACGAGTCTAAGGCTAAATATTATTTCTAGTAGGGCTGCCAATT\n//\n//\nContig0:153761-154003;size=17\nCATGGCCTCCTCCAAACACCTTCTTTATTCTCACCACAGGACCTTCCTCCTGGTGTCTGATAATGCTTGTA-----------------------------------------------------------------------------------------------CTTTTTAAACCCAGGTGCCCTGATTAGCCTGCTTTGATTGGCTGGAGGTGATCTAATCAGCCTGTCTGCCTTAATT\nContig0:153761-154003;size=1\nCATGGCCTCCTCCAAACACCTTCTTTATTCTCACCACAGGACCTTCCTCCTGGTGTCTGATAATGCTTGTA-----------------------------------------------------------------------------------------------CTTTTTAAACCCAGGTGCCCTGATGAGCCTGCTTTGATTGGCTGGAGGTGATCTAATCAGCCTGTCTGCCTTAATT\nContig0:153761-154003;size=1\nCATGGCCTCCTCCAAACACCTTCTTTATTCTCACCACAGGACCTTCCTCCTGG-------------------------------------------------------------------------------------------------------------------------------------------------TTTGATTGGCTGGAGGGGATCTAATCAGCCTGTCTGCCTTAATT\nContig0:153761-154003;size=1\nCATGGCCTCCTCCAAACACCTTCTTTATTCTCACCACAGGACCTTCCTCCTGGTGTCTGATAATGCTTGTA------------------------------------------------------------------------------------------------TTTTTAAACCCAGGTGCCCTGATTAGCCTGCTTTGATTGGCTGGAGGTGATCTAATCAGCCTGTCTGCCTTAATT\nContig0:153761-154003;size=1\nCATGGCCTCCTCCAAACACCTTCTTTATCCTCACCACAGGACCTTCCTCCTGGTGTCTGAT-------------------------------------------------------------------------------------------------------------------------------------------TGATTAGCTGGAGGTGATCTAATCAGCCGGTGTGCTTTAATT\nContig0:153761-154003;size=1\nCATAGCCTCCTCCAAACACCTTCTTTATTCTCACCACAGGACCTTCCTCCTGGTGTCTGATAATGCTTGTA-----------------------------------------------------------------------------------------------CTTTTTAAACCCAGGTGCCCTGATTAGCCTGCTTTGATTGGCTGGAGGTGATCTAATCAGCCTGTCTGCCTTAATT\nContig0:153761-154003;size=1\nCATGGCCTCCTCCAAACACCTTCTTTATTCTCACCACAGGACCTTCCTCCTGGTGTCTGATAATGCTTGTA--------------------------------------------------------------------------------------------------TTTAAACCCAGGTGCCCTGATTACCCTGCTTTGATTGGCTGCAGGTGTTCTAATCAGCCAGTCTGCCTTAATT\n//\n//\nContig0:155102-155187;size=1\nAATTCAGTTCCCCATCACCTGGGGCCAGGACTCTTCTGGGCAGCTTCAGCAGGGACGGGAGGCCAGAGACTGTGTGTGGTCCATG\n//\n//\nContig0:157369-157580;size=1\nAATTCCTGGCTCTGCTACCGTCTAGCTGTGTGATGGTGCA-----------------------------------------------------------------------------------------------TCCTCTGATGAGAGATGCTATTTCTTAGCTCCTACATATTAGCCAAAGGCTATTTAACCCACTTTCCAGTGTAATT\n//\n//\nContig0:158525-158758;size=7\nAATTAGATCTTGTGCACAATGAGACAGGACGAGAACCTGGGCCTTTCCAGTCCAGAGACACAAGCCTCCACCACCA--------------------------------------------------------------------------------------ACCCCAGCAGACACGCAGGGCTTTGCTGCAACTGCGCCCCTTGGCAGCTGACATTGCTTATGTCTAACATG\nContig0:158525-158758;size=3\nAATTAGATCTTGTGCACAATGAGACAGGACGAGAACCTGGGCCTTTCCAGTCCAGAGACACAAGCCTCCACCACCA---------------------------------------------------------------------------------------CCCCAGCAGACACGCAGGGCTTTGCTGCAACTGCGCCCCTTGGCAGCTGACATTGCTTATGTCTAACATG\nContig0:158525-158758;size=1\nAATTAGATCTTGTGCACAATTAGACAGGACGAGAACCTGGGCCTTTCCAGTCCAGAGACACA----------------------------------------------------------------------------------------------------ACCCCAGCAGACACGCAGGGCTTTGCTGCAACTGCGCCCCTTGGCAGCTGACATTGCTTATGTCTAACATG\n//\n//\nContig0:160325-160508;size=1\nAATTTGCAAGGCAGGGAGCT--CAGTGTCTGCTCCAAAAATCCGCGCTCTCTGTCTCCCCGATGCTCCCTGTCACACT-----------------------------------------------------TAGCTGCCCACAATGCACCACTCCCAACAGCGCTGCAAATGTGGCCACACTT\n//\n//\nContig0:161569-161784;size=1\nAATTGAAATAGCAAGGAGGGTGCTCAGTGACGGTGGGCATGATATAAAAACCCAAACAGATCGGAAGTCAGTGGCA--------------------------------------------------------------------CCTGCATCAAAAAGAAGAGACTTCCTGAAATAGACAAGTGTCTCAGCAATGAGACAGGCTCAGAGAACATG\n//\n//\nContig0:163234-163571;size=1\nAATTCTGGCTCATTGAACTTCATTTACTCATTGCTGCAATCCCAAGCCTTCAAAAATCATG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nContig0:163234-163571;size=1\n-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CATGGCTGCAGTTTAACTTTTTTGTTTTGAAAGTAAAGCAGTGATTCTCCTGCAATCTTGTGGGGCCTTAAGACAGACATCAAACATCCTGAGATTCGCAATAAGAACTCAATT\n//\n//\nContig0:165831-166037;size=1\nAATTTCCTGTCCTCCTTCTACCCAAAGCTTCCCCTCCCCACAACGTGCCTCAGGCCCAAAGAATGACCTCGGAAGC-----------------------------------------------------------CCGTGGACTGCCCCAGTGGAGGCATGACAAGATGGACCGTGCGAGTTCCCCCTTTTCTGTGCTGCCCCATG\nContig0:165831-166037;size=1\nAATTTCCTGTCCTCCTTCTACCCAAAGCTTCCCCTCCCCACAACGTGCCTCAGGCCCAAAGAATGACCTCGGAAGCATG-------------------------------------------------------------------------------------------------------------------------------\n//\n//\nContig0:169197-169391;size=1\nAATTAGCAAGGAGGTTTGTCCTCTTGCCCCCCAGGGGCATCTTTGTTGGGGCTGGTTCAGACTGATTGCATAA------------------------------------------------------CCCAGTGACCTCCAAGCACTTTACAAACCTGCCTCAGGGGCCTTACACAGGAGGAAAAGGAGACATG\nContig0:169197-169391;size=1\nAATTAGCAAGGAGGTTTGTCCTCTTGCCCCCCAGGGGCATCTTTGTTGGGGCTGGTTCAGACTGATTGCATAAGCT-----------------------------------------------TTCACCCAGTGACCTCCAAGCACTTTACAAACCTGCCTCAGGGGCCTTACACAGGAGGAAAAGGAGACATG'
regions = (i.split("\t") for i in fullregions)
regions = ((i, int(j), int(k)) for (i, j, k) in regions)
build_ref_cigars(data, sample)
reg = ('Contig0', 41920, 42479)
reg = ('Contig0', 7219468, 7220326)
reg = ('Contig0', 207998, 208219)
reg = ('Contig0', 16738, 16933)
reg = ('Contig0', 49781, 50005)
reg = ('Contig0', 76480, 76711)
reg = ('Contig0', 24391, 24908)
reg = ('Contig0', 7193, 7952) # has merge
#reg = ('Contig0', 13327, 13845) # has indels
reg = ('Contig0', 76480, 77015) # has indels
reg = ('Contig0', 346131, 346193) # valueerror
samfile = AlignmentFile(
os.path.join(data.dirs.refmapping,
"{}-mapped-sorted.bam".format(sample.name)),
'rb')
ref = get_ref_region(data.paramsdict["reference_sequence"], *reg)
reads = list(samfile.fetch(*reg))
def cigared(sequence, cigartups):
start = 0
seq = ""
for tup in cigartups:
flag, add = tup
if flag is 0:
seq += sequence[start:start + add]
if flag is 1:
pass
if flag is 2:
seq += "-" * add
start -= add
if flag is 4:
pass
start += add
return seq
# match paired reads together in a dictionary
rdict = {}
for read in reads:
if read.qname not in rdict:
rdict[read.qname] = [read]
else:
rdict[read.qname].append(read)
# sort keys by derep number
keys = sorted(
rdict.keys(),
key=lambda x: int(x.split("=")[-1]), reverse=True)
# build the cluster based on map positions, orientation, cigar
for key in keys:
r1, r2 = rdict[key]
if r1 and r2:
print(r1.seq, r1.cigar)# r1.pos, r1.aend, r1.rlen, r1.aligned_pairs)
print(cigared(r1.seq, r1.cigar))
CATGTTCAGTGTAGTATGAAGCGGGGTAGCACTTTCCACTGCAGAAGTAGGTCTCATTGTGTGCGTAGTGC [(4, 15), (0, 25), (1, 3), (0, 16), (1, 1), (0, 5), (1, 1), (0, 5)] (4, 15) 15 15 (0, 25) 40 40 ATGAAGCGGGGTAGCACTTTCCACT (1, 3) 43 43 ATGAAGCGGGGTAGCACTTTCCACT (0, 16) 59 59 ATGAAGCGGGGTAGCACTTTCCACTGAAGTAGGTCTCATTG (1, 1) 60 60 ATGAAGCGGGGTAGCACTTTCCACTGAAGTAGGTCTCATTG (0, 5) 65 65 ATGAAGCGGGGTAGCACTTTCCACTGAAGTAGGTCTCATTGGTGCG (1, 1) 66 66 ATGAAGCGGGGTAGCACTTTCCACTGAAGTAGGTCTCATTGGTGCG (0, 5) 71 71 ATGAAGCGGGGTAGCACTTTCCACTGAAGTAGGTCTCATTGGTGCGAGTGC ATGAAGCGGGGTAGCACTTTCCACTGAAGTAGGTCTCATTGGTGCGAGTGC
out = open("test2.txt", 'w')
# build the cluster based on map positions, orientation, cigar
for key in keys:
r1, r2 = rdict[key]
if r1 and r2:
# empty arrays
aref = np.array(list(ref[1]))
arr1 = np.zeros(aref.size, dtype="U1")
arr2 = np.zeros(aref.size, dtype="U1")
arr1.fill("-")
arr2.fill("-")
# how far ahead of the start does this read begin
start = r1.reference_start - reg[1]
seq = cigared(r1.seq, r1.cigar)
arr1[start:start+len(seq)] = list(seq)
seq = cigared(r2.seq, r2.cigar)
start = r2.reference_start - reg[1]
arr2[start:start+len(seq)] = list(seq)
#print("".join(arr1), file=out)
#print("".join(arr2), file=out)
arr3 = join_arrays(arr1, arr2)
print("".join(arr3), file=out)
out.close()
import numba
#numba.jit(nopython=True)
def join_arrays(arr1, arr2):
arr3 = np.zeros(arr1.size, dtype="U1")
for i in range(arr1.size):
if arr1[i] == arr2[i]:
arr3[i] = arr1[i]
elif arr1[i] == "N":
if arr2[i] == "-":
arr3[i] = "N"
else:
arr3[i] = arr2[i]
elif arr2[i] == "N":
if arr1[i] == "-":
arr3[i] = "N"
else:
arr3[i] = arr1[i]
elif arr1[i] == "-":
if arr2[i] == "N":
arr3[i] = "N"
else:
arr3[i] = arr2[i]
elif arr2[i] == "-":
if arr1[i] == "N":
arr3[i] = "N"
else:
arr3[i] = arr1[i]
else:
arr3[i] = "N"
return arr3
a1 = np.array(list("AGAGAG-NN----"))
a2 = np.array(list("----ACTTNNTTT"))
de = np.array(list("AGAGANTTNNTTT"))
a1 = np.array(list("AGAGAG-NN-------"))
a2 = np.array(list("----ACTTNNTTT---"))
de = np.array(list("AGAGANTTNNTTT---"))
join_arrays(a1, a2)
array(['A', 'G', 'A', 'G', 'A', 'N', 'T', 'T', 'N', 'N', 'T', 'T', 'T', '-', '-', '-'], dtype='<U1')
# how far ahead of the start does this read begin
start = r1.reference_start - reg[1]
552
aref = np.array(list(ref[1]))
arr1 = np.zeros(aref.size, dtype="U1")
arr2 = np.zeros(aref.size, dtype="U1")
arr1.fill("-")
r1.reference_start, r1.pos, r1.reference_end, r1.cigar
(7745, 7745, 7821, [(0, 76)])
cigared(r1.seq, r1.cigar)
'AATTAACATTGCTGTGCGATTCCTGGACCACTGCAGAATTCAGTGAAATCTAGGGGTCTTGGCTGCCTAAATACAT'
# which is forward and reverse
if r1.blocks[0][0] > r2.blocks[0][0]:
rx = r2
r2 = r1
r1 = rx
# get arrs
aref = np.array(list(ref[1]))
arr1 = np.zeros(aref.size, dtype="U1")
arr2 = np.zeros(aref.size, dtype="U1")
# fill arr1
seq1 = cigared(r1.seq, r1.cigar)
arr1[]
array(['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''], dtype='<U1')
ref = get_ref_region(data.paramsdict["reference"], *reg)
def cigar(r1, r2, reg):
# which is forward and reverse
if r1.blocks[0][0] > r2.blocks[0][0]:
rx = r2
r2 = r1
r1 = rx
# get arrs
aref = np.array(list(ref[1]))
arr1 = np.zeros(aref.size, dtype="U1")
arr2 = np.zeros(aref.size, dtype="U1")
# fill arr1
# do they overlap?
overlap = False
if max(r1.blocks[0]) > min(r2.blocks[0]):
overlap = True
#osegment = r1.blocks[0][1] - r2.blocks[0][0]
#oseqs = r1.seq[-osegment:], r2.seq[:osegment]
#print(oseqs)
# modify for cigar
for edit in r1.cigar:
r1.seq
before = "-" * (r1.pos - reg[1])
if r1.is_reverse:
read = before + revcomp(r1.seq)
else:
read = before + r1.seq
midns = "-" * (r2.pos - r1.aend)
read += midns
if r2.is_reverse:
read += r2.seq
else:
read += revcomp(r2.seq)
after = "-" * (reg[2] - r2.aend)
read += after
return read
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-197-82b00e79f3db> in <module>() ----> 1 ref = get_ref_region(data.paramsdict["reference"], *reg) 2 3 def cigar(r1, r2, reg): 4 5 # which is forward and reverse KeyError: 'reference'
def get_ref_region(reference, contig, rstart, rend):
cmd = [
ip.bins.samtools, 'faidx',
reference,
"{}:{}-{}".format(contig, rstart+1, rend)]
stdout, err = sps.Popen(cmd, stdout=sps.PIPE).communicate()
name, seq = stdout.decode().split("\n", 1)
listseq = [name, seq.replace("\n", "")]
return listseq
ireg = samfile.fetch(*reg)
rdict = {}fo
for read in ireg:
if read.qname not in rdict:
rdict[read.qname] = [read]
else:
rdict[read.qname].append(read)
clust = dict_to_clust(rdict)
clust
['AATTTATTATGAAACAAAAGGCAAAAAACTGTTATGTACATAGTTTAGTCCTATTGAGTGTCTACTCAGCGCTnnnnCGCTCACTGCTCCGCAGTTCAGTGGAAAAAAATCTAGATGAGAGTGCAAAAAATGTATCTTTAGTGACATG', 'AATTTAGACCTATTCGGTGTCTACTCAGCGCTTCTTGGCTTGTCTTTTGTATTTATTTAAnnnnGCTCCGCAGTTCGGTGGAAAAAAATCTAGATGAGAGTGCAAAAAATGTATCTTTAGTGACATG']
ref = get_ref_region(*reg)
ref
['>Contig0:24392-24679', 'AATTTATTATGAAACAAAAGGCAAAAAACTATTATGTACATAGTTTAGTCCTATTCAGTGTCTACTCAGCGCTTCTTGGCTTGTCTCTTGTATTCATTAAATGGAGCATCTCTTGTCACTGTCCAGCAATAGTCTGCAAGCATTGATGGGCTCCATTTGCCCTGATAGCGTTTCTCCATTGTTGCAATGTCCTGGTGAAATCGCTCGCCGTGCTCGTCGCTCACTGCTCCGCAGTTCGGTGGAAAAAAATCTAGATGAGAGTGCAAAAAATGTATCTTTAGTGACATG']
print(ref[1][:80])
print(r1.seq)
AATTTATTATGAAACAAAAGGCAAAAAACTATTATGTACATAGTTTAGTCCTATTCAGTGTCTACTCAGCGCTTCTTGGC AATTTAGACCTATTCGGTGTCTACTCAGCGCTTCTTGGCTTGTCTTTTGTATTTATTTAA
# how for ahead of reference is r1 start
ahead = -1 * (reg[1] - (r1.pos - 1))
print(ref[1][:90])
print("-"*ahead, r1.seq)
AATTTATTATGAAACAAAAGGCAAAAAACTATTATGTACATAGTTTAGTCCTATTCAGTGTCTACTCAGCGCTTCTTGGCTTGTCTCTTG ---------------------------------------- AATTTAGACCTATTCGGTGTCTACTCAGCGCTTCTTGGCTTGTCTTTTGTATTTATTTAA
print('r', ref[1][:90])
for key in rdict:
r1, r2 = rdict[key]
ahead = -1 * (reg[1] - (r1.pos))
print('h', ("-"*ahead + r1.seq)[:90])
#print(r1.cigar)
print('r', ref[1][-90:])
for key in rdict:
r1, r2 = rdict[key]
ahead = (reg[2] - (r2.pos))
print(reg[2], r2.pos, ahead)
print('h', ("-"*ahead))
#print(r1.cigar)
#print(r2.seq)
#print(r2.get_blocks())
#print(r1.seq)
#print(r1.get_blocks()[0])
#print(r1.aend, r2.get_blocks()[0][0])
#print(r1.cigar)
print(r2.cigarstring)
r AATTTATTATGAAACAAAAGGCAAAAAACTATTATGTACATAGTTTAGTCCTATTCAGTGTCTACTCAGCGCTTCTTGGCTTGTCTCTTG h AATTTATTATGAAACAAAAGGCAAAAAACTGTTATGTACATAGTTTAGTCCTATTGAGTGTCTACTCAGCGCT h -----------------------------------------AATTTAGACCTATTCGGTGTCTACTCAGCGCTTCTTGGCTTGTCTTTTG r AATCGCTCGCCGTGCTCGTCGCTCACTGCTCCGCAGTTCGGTGGAAAAAAATCTAGATGAGAGTGCAAAAAATGTATCTTTAGTGACATG 24679 24608 71 h ----------------------------------------------------------------------- 71M 24679 24616 63 h --------------------------------------------------------------- 63M
for read in clust:
print(read)
AATTCAGAACTGCCAGTGAACCAACCCCCCATCCATTATTTGCACAACTCTACTTAAGACTTCAGGCCGCAAAGGTnnnnAGCAGTATAGGGCCTATGACACTCAGTGGTGCAGTTCTGATTCCATCCAGCAGAGGACAGTGCTGACCATG AATTCAGAACTGCCAGTGAACCAACCCCCCATCCATTATTTGCACAACTCTACTTAAGACTTCAGGCCGCAGAGGTnnnnGGGCCTATGACACTCAGTGGTGCAGTTCTGATTCCATCCAGCAGAGGACAGTGCTGACCATG AATTCAGAACTGCCAGTGAACCAACCCCCCATCCATTATTTGCACAACTCTACTTAAGACTTCAGGCCGCAAAGGnnnnAGCAGTATAGGGCCTATGACACTCAGTGGTGCAGTTCTGATTCCATCCAGCAGAGGACAGTGCTGACCATG
# access reads from bam file using pysam
samfile = AlignmentFile(
os.path.join(data.dirs.refmapping,
"{}-mapped-sorted.bam".format(sample.name)),
'rb')
# iterate over all mapped regions
clusters = []
for reg in regions:
ireg = samfile.fetch(*reg)
# match paired reads by read names for all reads in each cluster
rdict = {}
for read in ireg:
if read.qname not in rdict:
rdict[read.qname] = [read]
else:
rdict[read.qname].append(read)
clust = dict_to_clust(rdict)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-30-b830d6272d8d> in <module>() 18 rdict[read.qname].append(read) 19 ---> 20 clust = dict_to_clust(rdict) <ipython-input-27-17187aba4e6c> in dict_to_clust(rdict) 2 clust = [] 3 for pair in rdict.values(): ----> 4 r1, r2 = pair 5 poss = r1.get_reference_positions() + r2.get_reference_positions() 6 seedstart, seedend = min(poss), max(poss) ValueError: not enough values to unpack (expected 2, got 1)
rdict
{'1c64525c1f5f58dc1f713b0e5e1d0941;size=1': [<pysam.libcalignedsegment.AlignedSegment at 0x7f0a9fae5ac8>]}
from ipyrad.assemble.util import revcomp
def build_ref_clusters_from_cigars(data, sample):
# get regions from bedtools overlaps
regions = bedtools_merge(data, sample).strip().split("\n")
regions = (i.split("\t") for i in regions)
regions = ((i, int(j), int(k)) for (i, j, k) in regions)
# access reads from bam file using pysam
samfile = AlignmentFile(
os.path.join(data.dirs.refmapping,
"{}-mapped-sorted.bam".format(sample.name)),
'rb')
# iterate over all mapped regions
clusters = []
for reg in regions:
ireg = samfile.fetch(*reg)
# match paired reads by read names for all reads in each cluster
rdict = {}
for read in ireg:
if read.qname not in rdict:
rdict[read.qname] = [read]
else:
rdict[read.qname].append(read)
return dict_to_clust(rdict)
seq = build_ref_clusters_from_cigars(data, sample)
seq
['TGCAGTTTAACTGTTCAAGTTGGCAAGATCAAGTCGTCCCTAGCCCCCGCGTCCGTTTTTACCTGGTCGCGGTCCCGACCCAGCTGCCCGCnnnnTGCACTCACTTCCATGAGCGTCTGAAAAGTGACATCTGACTCGTAGGCACGGCCGCTATGGTGCGCGATCCCGCTGCAGTTAACTGGCCTCTTAACCCCG', 'TGCAGTTTAACTGTTCAAGTTGGCAAGATCAAGTCGTCCCTAGCCCCCGCGTCCGTTTTTACCTGGTCGCGGTCCCGACCCAGCTGCCCGCnnnnTGCACTCACTTCCATGAGCGTCTGAAAAGTGACATCTGACTCGTAGGCACGGCCGCTATGGTGCGCGATCCCGCTGCAGTTAACTGGCCTCTTAACCCCG', 'TGCAGTTTAACTGTTCAAGTTGGCAAGATCAAGTCGTCCCTAGCCCCCGCGTCCGTTTTTACCTGGTCGCGGTCCCGACCCAGCTGCCCGCnnnnTGCACTCACTTCCATGAGCGTCTGAAAAGTGACATCTGACTCGTAGGCACGGCCGCTATGGTGCGCGATCCCGCTGCAGTTAACTGGCCTCTTAACCCCG', 'TGCAGTTTAACTGTTCAAGTTGGCAAGATCAAGTCGTCCCTAGCCCCCGCGTCCGTTTTTACCTGGTCGCGGTCCCGACCCAGCTGCCCGCnnnnTGCACTCACTTCCATGAGCGTCTGAAAAGTGACATCTGACTCGTAGGCACGGCCGCTATGGTGCGCGATCCCGCTGCAGTTAACTGGCCTCTTAACCCCG', 'TGCAGTTTAACTGTTCAAGTTGGCAAGATCAAGTCGTCCCTAGCCCCCGCGTCCGTTTTTACCTGGTCGCGGTCCCGACCCAGCTGCCCGCnnnnTGCACTCACTTCCATGAGCGTCTGAAAAGTGACATCTGACTCGTAGGCACGGCCGCTATGGTGCGCGATCCCGCTGCAGTTAACTGGCCTCTTAACCCCG']
def dict_to_clust(rdict):
clust = []
for pair in rdict.values():
r1, r2 = pair
poss = r1.get_reference_positions() + r2.get_reference_positions()
seedstart, seedend = min(poss), max(poss)
reads_overlap = False
#print(r1.cigartuples, r1.cigarstring, r1.cigar)
if r1.is_reverse:
if r2.aend > r1.get_blocks()[0][0]:
reads_overlap = True
seq = r2.seq + 'nnnn' + revcomp(r1.seq)
else:
seq = r2.seq + 'nnnn' + r2.seq
else:
if r1.aend > r2.get_blocks()[0][0]:
reads_overlap = True
seq = r1.seq + 'nnnn' + revcomp(r2.seq)
else:
seq = r1.seq + 'nnnn' + r2.seq
clust.append(seq)
return clust
# build clusters for aligning with muscle from the sorted bam file
samfile = AlignmentFile(
os.path.join(data.dirs.refmapping,
"{}-mapped-sorted.bam".format(sample.name)),
'rb')
chrom, rstart, rend = regions[0].split()
reg = samfile.fetch(chrom, int(rstart), int(rend))
for read in reg:
print(rstart)
print(read.seq)
5009 TGCAGTTTAACTGTTCAAGTTGGCAAGATCAAGTCGTCCCTAGCCCCCGCGTCCGTTTTTACCTGGTCGCGGTCCCGACCCAGCTGCCCGC 5009 TGCAGTTTAACTGTTCAAGTTGGCAAGATCAAGTCGTCCCTAGCCCCCGCGTCCGTTTTTACCTGGTCGCGGTCCCGACCCAGCTGCCCCC 5009 TGCAGTTTAACTGTTCAAGTTGGCAAGATCAAGTCGTCCCTAGCCCCCGCGTCCGTTTTTACCTGGTCGCGGTCCCGAACCAGCTGCCCCC 5009 TGCAGTTTAACTGTTCAAGTTGGCAAGATCAAGTCGTCCCTAGCCCCCGCGTCCGTTTTTACCTGGTCGCGGCCCCGACCCAGCTGCCCGC 5009 TGCAGTTTAACTGTTCAAGTTGGCAAGATCAAGTCGTCCCTAGCCCCCGCGTCCGTTTTTACCTGGTCGCGGTCCCGACCCAGCTGCCCGC 5009 TGCACTCACTTCCATGAGCGTCTGAAAAGTGACATCTGACTCGTAGGCACGGCCGCTATGGTGCGCGATCCCGCTGCAGTTAACTGGCCTCTTAACCCCG 5009 TGCACTCACTTCCATGAGCGTCTGAAAAGTGACATCTGACTCGTAGGCACGGCCGCTATAGTGCGCGATCCCGCTGCAGTTAACTGGCCTCTTAACCCCG 5009 TGCACTCACTTCCATGAGCGTCTGAAAAGTGACATCTGACTCGTAGGCACGGCCGCTATAGTGCGCGATCCCGCTGCAGTTAACTGGCCTCTTAACCCCG 5009 TGCACTCACTTCCATGAGCGTCTGAAAAGTGACATCTGACTCGTAGGCACGGCCGCTATGGTGCGCGATCCCGCTGCAGTTAACTGGCCTCTTAACCCCG 5009 TGCACTCACTTCCATGAGCGTCTGAAAAGTGACATCTGACTCGTAGGCACGGCCGCTATGGTGCGCGATCCTGCTGCAGTTAACTGGCCTCTTAACCCCG
nonmerged1 = os.path.join(
data.tmpdir,
"{}_nonmerged_R1_.fastq".format(sample.name))
nonmerged2 = os.path.join(
data.tmpdir,
"{}_nonmerged_R2_.fastq".format(sample.name))
edits1 = os.path.join(
data.dirs.edits,
"{}.trimmed_R1_.fq.gz".format(sample.name))
edits2 = os.path.join(
data.dirs.edits,
"{}.trimmed_R2_.fq.gz".format(sample.name))
# file precedence
nonm1 = [i for i in (edits1, nonmerged1) if os.path.exists(i)][-1]
nonm2 = [i for i in (edits2, nonmerged2) if os.path.exists(i)][-1]
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-9-57cd2cd83b65> in <module>() 13 14 # file precedence ---> 15 nonm1 = [i for i in (edits1, nonmerged1) if os.path.exists(i)][-1] 16 nonm2 = [i for i in (edits2, nonmerged2) if os.path.exists(i)][-1] IndexError: list index out of range
edits1
'/home/deren/Documents/ipyrad/tests/4-refpairtest_edits/3L_0.trimmed_R1_.fq.gz'
infiles = [
os.path.join(data.dirs.edits, "{}.trimmed_R1_.fastq.gz".format(sample.name)),
os.path.join(data.dirs.edits, "{}_R1_concatedit.fq.gz".format(sample.name)),
os.path.join(data.tmpdir, "{}_merged.fastq".format(sample.name)),
os.path.join(data.tmpdir, "{}_declone.fastq".format(sample.name)),
]
infiles = [i for i in infiles if os.path.exists(i)]
infile = infiles[-1]
split_endtoend_reads(data, sample)
with open(inp, 'r') as infile:
duo = izip(*[iter(infile)] * 2)
idx = 0
while 1:
try:
itera = next(duo)
except StopIteration:
break
r1, r2 = itera[1].split("nnnn")
def split_endtoend_reads(data, sample):
"""
Takes R1nnnnR2 derep reads from paired data and splits it back into
separate R1 and R2 parts for read mapping.
"""
inp = os.path.join(data.tmpdir, "{}_derep.fastq".format(sample.name))
out1 = os.path.join(data.tmpdir, "{}_R1-tmp.fastq".format(sample.name))
out2 = os.path.join(data.tmpdir, "{}_R2-tmp.fastq".format(sample.name))
splitderep1 = open(out1, 'w')
splitderep2 = open(out2, 'w')
with open(inp, 'r') as infile:
# Read in the infile two lines at a time: (seqname, sequence)
duo = izip(*[iter(infile)] * 2)
## lists for storing results until ready to write
split1s = []
split2s = []
## iterate over input splitting, saving, and writing.
idx = 0
while 1:
try:
itera = next(duo)
except StopIteration:
break
## split the duo into separate parts and inc counter
part1, part2 = itera[1].split("nnnn")
idx += 1
## R1 needs a newline, but R2 inherits it from the original file
## store parts in lists until ready to write
split1s.append("{}{}\n".format(itera[0], part1))
split2s.append("{}{}".format(itera[0], part2))
## if large enough then write to file
if not idx % 10000:
splitderep1.write("".join(split1s))
splitderep2.write("".join(split2s))
split1s = []
split2s = []
## write final chunk if there is any
if any(split1s):
splitderep1.write("".join(split1s))
splitderep2.write("".join(split2s))
## close handles
splitderep1.close()
splitderep2.close()
merge_pairs(data, sample, 1, 1)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-10-15da25c95fd0> in <module>() ----> 1 mergepairs(data, sample, 1, 1) NameError: name 'mergepairs' is not defined
s3.data.samples["1A_0"].concatfiles
[('/home/deren/Documents/ipyrad/tests/4-refpairtest_edits/1A_0.trimmed_R1_.fastq.gz', '/home/deren/Documents/ipyrad/tests/4-refpairtest_edits/1A_0.trimmed_R2_.fastq.gz')]
s3.run()
[####################] 100% 0:00:00 | concatenating | s3 | [####################] 100% 0:00:04 | mapping | s3 |
def bedtools_merge(data, sample):
"""
Get all contiguous genomic regions with one or more overlapping
reads. This is the shell command we'll eventually run
bedtools bamtobed -i 1A_0.sorted.bam | bedtools merge [-d 100]
-i <input_bam> : specifies the input file to bed'ize
-d <int> : For PE set max distance between reads
"""
LOGGER.info("Entering bedtools_merge: %s", sample.name)
mappedreads = os.path.join(data.dirs.refmapping,
sample.name + "-mapped-sorted.bam")
## command to call `bedtools bamtobed`, and pipe output to stdout
## Usage: bedtools bamtobed [OPTIONS] -i <bam>
## Usage: bedtools merge [OPTIONS] -i <bam>
cmd1 = [ip.bins.bedtools, "bamtobed", "-i", mappedreads]
cmd2 = [ip.bins.bedtools, "merge", "-i", "-"]
## If PE the -d flag to tell bedtools how far apart to allow mate pairs.
## If SE the -d flag is negative, specifying that SE reads need to
## overlap by at least a specific number of bp. This prevents the
## stairstep syndrome when a + and - read are both extending from
## the same cutsite. Passing a negative number to `merge -d` gets this done.
if 'pair' in data.paramsdict["datatype"]:
check_insert_size(data, sample)
#cmd2.insert(2, str(data._hackersonly["max_inner_mate_distance"]))
cmd2.insert(2, str(data._hackersonly["max_inner_mate_distance"]))
cmd2.insert(2, "-d")
else:
cmd2.insert(2, str(-1 * data._hackersonly["min_SE_refmap_overlap"]))
cmd2.insert(2, "-d")
## pipe output from bamtobed into merge
LOGGER.info("stdv: bedtools merge cmds: %s %s", cmd1, cmd2)
proc1 = sps.Popen(cmd1, stderr=sps.STDOUT, stdout=sps.PIPE)
proc2 = sps.Popen(cmd2, stderr=sps.STDOUT, stdout=sps.PIPE, stdin=proc1.stdout)
result = proc2.communicate()[0].decode()
proc1.stdout.close()
## check for errors and do cleanup
if proc2.returncode:
raise IPyradWarningExit("error in %s: %s", cmd2, result)
## Write the bedfile out, because it's useful sometimes.
if os.path.exists(ip.__debugflag__):
with open(os.path.join(data.dirs.refmapping, sample.name + ".bed"), 'w') as outfile:
outfile.write(result)
## Report the number of regions we're returning
nregions = len(result.strip().split("\n"))
LOGGER.info("bedtools_merge: Got # regions: %s", nregions)
return result
bedtools_merge
<function __main__.bedtools_merge(data, sample)>
def check_insert_size(data, sample):
"""
check mean insert size for this sample and update
hackersonly.max_inner_mate_distance if need be. This value controls how
far apart mate pairs can be to still be considered for bedtools merging
downstream.
"""
## pipe stats output to grep
cmd1 = [
ip.bins.samtools,
"stats",
os.path.join(
data.dirs.refmapping, "{}-mapped-sorted.bam".format(sample.name)),
]
cmd2 = ["grep", "SN"]
proc1 = sps.Popen(cmd1, stderr=sps.STDOUT, stdout=sps.PIPE)
proc2 = sps.Popen(cmd2, stderr=sps.STDOUT, stdout=sps.PIPE, stdin=proc1.stdout)
res = proc2.communicate()[0].decode()
if proc2.returncode:
raise IPyradWarningExit("error in %s: %s", cmd2, res)
## starting vals
avg_insert = 0
stdv_insert = 0
avg_len = 0
## iterate over results
for line in res.split("\n"):
if "insert size average" in line:
avg_insert = float(line.split(":")[-1].strip())
elif "insert size standard deviation" in line:
## hack to fix sim data when stdv is 0.0. Shouldn't
## impact real data bcz stdv gets rounded up below
stdv_insert = float(line.split(":")[-1].strip()) + 0.1
elif "average length" in line:
avg_len = float(line.split(":")[-1].strip())
LOGGER.debug("avg {} stdv {} avg_len {}"
.format(avg_insert, stdv_insert, avg_len))
## If all values return successfully set the max inner mate distance.
## This is tricky. avg_insert is the average length of R1+R2+inner mate
## distance. avg_len is the average length of a read. If there are lots
## of reads that overlap then avg_insert will be close to but bigger than
## avg_len. We are looking for the right value for `bedtools merge -d`
## which wants to know the max distance between reads.
if all([avg_insert, stdv_insert, avg_len]):
## If 2 * the average length of a read is less than the average
## insert size then most reads DO NOT overlap
if stdv_insert < 5:
stdv_insert = 5.
if (2 * avg_len) < avg_insert:
hack = avg_insert + (3 * np.math.ceil(stdv_insert)) - (2 * avg_len)
## If it is > than the average insert size then most reads DO
## overlap, so we have to calculate inner mate distance a little
## differently.
else:
hack = (avg_insert - avg_len) + (3 * np.math.ceil(stdv_insert))
## set the hackerdict value
LOGGER.info("stdv: hacked insert size is %s", hack)
data._hackersonly["max_inner_mate_distance"] = int(np.math.ceil(hack))
else:
## If something fsck then set a relatively conservative distance
data._hackersonly["max_inner_mate_distance"] = 300
LOGGER.debug("inner mate distance for {} - {}".format(sample.name,\
data._hackersonly["max_inner_mate_distance"]))
#from ipyrad.assemble.refmap import *
data = s3.data
samples = list(data.samples.values())
sample = samples[0]
regions = bedtools_merge(data, sample).strip().split("\n")
nregions = len(regions)
chunksize = (nregions / 10) + (nregions % 10)
from pysam import AlignmentFile
sample.files.mapped_reads = os.path.join(
data.dirs.refmapping, sample.name + "-mapped-sorted.bam")
samfile = AlignmentFile(sample.files.mapped_reads, 'rb')
#"./tortas_refmapping/PZ70-mapped-sorted.bam", "rb")
regions[1]
'MT\t10109\t10200'
samfile = pysam.AlignmentFile("ex1.sam", "r")
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) <ipython-input-170-d009e749899f> in <module>() ----> 1 samfile = pysam.AlignmentFile("ex1.sam", "r") pysam/libcalignmentfile.pyx in pysam.libcalignmentfile.AlignmentFile.__cinit__() pysam/libcalignmentfile.pyx in pysam.libcalignmentfile.AlignmentFile._open() FileNotFoundError: [Errno 2] could not open alignment file `ex1.sam`: No such file or directory
a = pysam.AlignmentFile("./3-refsetest_refmapping/1A_0.sam")
nthreads = 2
cmd1 = [
ip.bins.bwa, "mem",
"-t", str(max(1, nthreads)),
"-M",
data.paramsdict['reference_sequence'],
sample.concatfiles[0][0],
sample.concatfiles[0][1] or "",
]
cmd1 = [i for i in cmd1 if i]
# Insert optional flags for bwa
bwa_args = data._hackersonly["bwa_args"].split()
bwa_args.reverse()
for arg in bwa_args:
cmd1.insert(2, arg)
with open(sample.files.sam, 'wb') as outfile:
proc1 = sps.Popen(cmd1, stderr=None, stdout=outfile)
error1 = proc1.communicate()[0]
if proc1.returncode:
raise IPyradError(error1)
def bwa_map(data, sample, nthreads, force):
"""
Map reads to reference sequence. This reads in the fasta files
(samples.files.edits), and maps each read to the reference. Unmapped reads
are dropped right back in the de novo pipeline.
Mapped reads end up in a sam file.
"""
sample.files.sam = os.path.join(
data.dirs.refmapping,
"{}.sam".format(sample.name))
sample.files.mapped_reads = os.path.join(
data.dirs.refmapping,
"{}-mapped-sorted.bam".format(sample.name))
sample.files.unmapped_bam = os.path.join(
data.dirs.refmapping,
"{}-unmapped.bam".format(sample.name))
sample.files.unmapped_reads = os.path.join(
data.dirs.refmapping,
"{}-unmapped.fastq".format(sample.name))
# (cmd1) bwa mem [OPTIONS] <index_name> <file_name_A> [<file_name_B>]
# -t # : Number of threads
# -M : Mark split alignments as secondary.
# (cmd2) samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]
# -b = write to .bam
# -q = Only keep reads with mapq score >= 30 (seems to be pretty standard)
# -F = Select all reads that DON'T have these flags.
# 0x4 (segment unmapped)
# 0x100 (Secondary alignment)
# 0x800 (supplementary alignment)
# -U = Write out all reads that don't pass the -F filter
# (all unmapped reads go to this file).
# (cmd3) samtools sort [options...] [in.bam]
# -T = Temporary file name, this is required by samtools, ignore it
# Here we hack it to be samhandle.tmp cuz samtools cleans it up
# -O = Output file format, in this case bam
# -o = Output file name
# (cmd5) samtools bam2fq -v 45 [in.bam]
# -v45 set the default qscore arbirtrarily high
#
cmd1 = [
ip.bins.bwa, "mem",
"-t", str(max(1, nthreads)),
"-M",
data.paramsdict['reference_sequence'],
sample.concatfiles[0][0],
sample.concatfiles[0][1] or "",
]
cmd1 = [i for i in cmd1 if i]
# Insert optional flags for bwa
bwa_args = data._hackersonly["bwa_args"].split()
bwa_args.reverse()
for arg in bwa_args:
cmd1.insert(2, arg)
with open(sample.files.sam, 'wb') as outfile:
proc1 = sps.Popen(cmd1, stderr=None, stdout=outfile)
error1 = proc1.communicate()[0]
if proc1.returncode:
raise IPyradError(error1)
# sends unmapped reads to a files and will PIPE mapped reads to cmd3
cmd2 = [
ip.bins.samtools, "view",
"-b",
"-F", "0x904",
"-U", sample.files.unmapped_bam,
sample.files.sam,
]
# this is gonna catch mapped bam output from cmd2 and write to file
cmd3 = [
ip.bins.samtools, "sort",
"-T", os.path.join(data.dirs.refmapping, sample.name + ".sam.tmp"),
"-O", "bam",
"-o", sample.files.mapped_reads]
# Later we're gonna use samtools to grab out regions using 'view'
cmd4 = [ipyrad.bins.samtools, "index", sample.files.mapped_reads]
# convert unmapped reads to fastq
cmd5 = [
ip.bins.samtools, "bam2fq",
"-v 45",
sample.files.unmapped_bam,
]
# Insert additional arguments for paired data to the commands.
# We assume Illumina paired end reads for the orientation
# of mate pairs (orientation: ---> <----).
if 'pair' in data.paramsdict["datatype"]:
# add samtools filter for only keep if both pairs hit
# 0x1 - Read is paired
# 0x2 - Each read properly aligned
cmd2.insert(2, "0x3")
cmd2.insert(2, "-f")
# tell bam2fq that there are output files for each read pair
cmd5.insert(2, os.path.join(
data.dirs.edits, sample.name + "-tmp-umap1.fastq"))
cmd5.insert(2, "-1")
cmd5.insert(2, os.path.join(
data.dirs.edits, sample.name + "-tmp-umap2.fastq"))
cmd5.insert(2, "-2")
else:
cmd5.insert(2, sample.files.unmapped_reads)
cmd5.insert(2, "-0")
# cmd2 writes to sname.unmapped.bam and fills pipe with mapped BAM data
LOGGER.debug(" ".join(cmd2))
proc2 = sps.Popen(cmd2, stderr=sps.STDOUT, stdout=sps.PIPE)
# cmd3 pulls mapped BAM from pipe and writes to sname.mapped-sorted.bam
LOGGER.debug(" ".join(cmd3))
proc3 = sps.Popen(cmd3,
stderr=sps.STDOUT, stdout=sps.PIPE, stdin=proc2.stdout)
error3 = proc3.communicate()[0]
if proc3.returncode:
raise IPyradWarningExit(error3)
proc2.stdout.close()
# cmd4 indexes the bam file
LOGGER.debug(" ".join(cmd4))
proc4 = sps.Popen(cmd4, stderr=sps.STDOUT, stdout=sps.PIPE)
error4 = proc4.communicate()[0]
if proc4.returncode:
raise IPyradWarningExit(error4)
# Running cmd5 writes to either edits/sname-refmap_derep.fastq for SE
# or it makes edits/sname-tmp-umap{12}.fastq for paired data, which
# will then need to be merged.
LOGGER.debug(" ".join(cmd5))
proc5 = sps.Popen(cmd5, stderr=sps.STDOUT, stdout=sps.PIPE)
error5 = proc5.communicate()[0]
if proc5.returncode:
raise IPyradWarningExit(error5)
bwa_map(data, sample, 2, 1)
cmd1 = [
ip.bins.bwa, "mem",
"-t", str(max(1, 2)),
"-M",
data.paramsdict['reference_sequence'],
sample.concatfiles[0][0],
sample.concatfiles[0][1] or "",
]
proc1 = sps.Popen(cmd1)#, stderr=cmd1_stderr, stdout=cmd1_stdout)
proc1.communicate()
(None, None)
pysam.sort("-m", "1000000", "-o", "output.bam", "ex1.bam")
iterreg = samfile.fetch("MT", 5009, 5100)
iterreg = samfile.fetch("MT", 10109, 10200)
iterreg = samfile.fetch("MT", 285510, 285600)
for read in iterreg:
print(read.seq)
print(read.compare(read.get_reference_sequence()))
TGCAGACCATAATGGCATGGTGCCACAGCTAGGGACCGATCTCTTTCTAAACTACATCCCCTAGGTTGAGACCCATGCGTGCTCTAATTGG
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-181-b00faa645bf8> in <module>() 1 for read in iterreg: 2 print(read.seq) ----> 3 print(read.compare(read.get_reference_sequence())) TypeError: Argument 'other' has incorrect type (expected pysam.libcalignedsegment.AlignedSegment, got str)
it = samfile.pileup("MT", 5009, 5100)
pysam.Pileup.
<pysam.libcalignmentfile.IteratorColumnRegion at 0x7fc1fa046558>
for read in iterreg:
print(read.aligned_pairs)
[(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)] [(0, 10109), (1, 10110), (2, 10111), (3, 10112), (4, 10113), (5, 10114), (6, 10115), (7, 10116), (8, 10117), (9, 10118), (10, 10119), (11, 10120), (12, 10121), (13, 10122), (14, 10123), (15, 10124), (16, 10125), (17, 10126), (18, 10127), (19, 10128), (20, 10129), (21, 10130), (22, 10131), (23, 10132), (24, 10133), (25, 10134), (26, 10135), (27, 10136), (28, 10137), (29, 10138), (30, 10139), (31, 10140), (32, 10141), (33, 10142), (34, 10143), (35, 10144), (36, 10145), (37, 10146), (38, 10147), (39, 10148), (40, 10149), (41, 10150), (42, 10151), (43, 10152), (44, 10153), (45, 10154), (46, 10155), (47, 10156), (48, 10157), (49, 10158), (50, 10159), (51, 10160), (52, 10161), (53, 10162), (54, 10163), (55, 10164), (56, 10165), (57, 10166), (58, 10167), (59, 10168), (60, 10169), (61, 10170), (62, 10171), (63, 10172), (64, 10173), (65, 10174), (66, 10175), (67, 10176), (68, 10177), (69, 10178), (70, 10179), (71, 10180), (72, 10181), (73, 10182), (74, 10183), (75, 10184), (76, 10185), (77, 10186), (78, 10187), (79, 10188), (80, 10189), (81, 10190), (82, 10191), (83, 10192), (84, 10193), (85, 10194), (86, 10195), (87, 10196), (88, 10197), (89, 10198), (90, 10199)]
rdict = {}
for read in iterreg:
if read.qname not in rdict:
rdict[read.qname] = read
read.seq
'TGCAGTTTAACTGTTCAAGTTGGCAAGATCAAGTCGTCCCTAGCCCCCGCGTCCGTTTTTACCTGGTCGCGGTCCCGACCCAGCTGCCCCC'
read.cigar
[(0, 91)]
sorted(
rdict.keys(),
key=lambda x: int(x.split(""))
dict_keys(['lane1_locus0_1B_0_0', 'lane1_locus0_1B_0_1', 'lane1_locus0_1B_0_2', 'lane1_locus0_1B_0_3', 'lane1_locus0_1B_0_4', 'lane1_locus0_1B_0_5', 'lane1_locus0_1B_0_6', 'lane1_locus0_1B_0_7', 'lane1_locus0_1B_0_8', 'lane1_locus0_1B_0_9', 'lane1_locus0_1B_0_10', 'lane1_locus0_1B_0_11', 'lane1_locus0_1B_0_12', 'lane1_locus0_1B_0_13', 'lane1_locus0_1B_0_14', 'lane1_locus0_1B_0_15', 'lane1_locus0_1B_0_16', 'lane1_locus0_1B_0_17', 'lane1_locus0_1B_0_18', 'lane1_locus0_1B_0_19', 'lane1_locus0_1B_0_20'])
it = samfile.pileup('MT', 5009, 5100)
clusts = []
nclusts = 0
for region in regions:
chrom, pos1, pos2 = region.split()
clust = fetch_cluster_se(data, samfile, chrom, int(pos1), int(pos2))
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-71-06a117ea708d> in <module>() 4 chrom, pos1, pos2 = region.split() 5 ----> 6 clust = fetch_cluster_se(data, samfile, chrom, int(pos1), int(pos2)) ~/Documents/ipyrad/ipyrad/assemble/refmap.py in fetch_cluster_se(data, samfile, chrom, rstart, rend) 368 ## sort dict keys so highest derep is first ('seed') 369 sfunc = lambda x: int(x.split(";size=")[1].split(";")[0]) --> 370 rkeys = sorted(rdict.keys(), key=sfunc, reverse=True) 371 372 ## get blocks from the seed for filtering, bail out if seed is not paired ~/Documents/ipyrad/ipyrad/assemble/refmap.py in <lambda>(x) 367 368 ## sort dict keys so highest derep is first ('seed') --> 369 sfunc = lambda x: int(x.split(";size=")[1].split(";")[0]) 370 rkeys = sorted(rdict.keys(), key=sfunc, reverse=True) 371 IndexError: list index out of range
def fetch_cluster_se(data, samfile, chrom, rstart, rend):
"""
Builds a single end cluster from the refmapped data.
"""
## If SE then we enforce the minimum overlap distance to avoid the
## staircase syndrome of multiple reads overlapping just a little.
overlap_buffer = data._hackersonly["min_SE_refmap_overlap"]
## the *_buff variables here are because we have to play patty
## cake here with the rstart/rend vals because we want pysam to
## enforce the buffer for SE, but we want the reference sequence
## start and end positions to print correctly for downstream.
rstart_buff = rstart + overlap_buffer
rend_buff = rend - overlap_buffer
## Reads that map to only very short segements of the reference
## sequence will return buffer end values that are before the
## start values causing pysam to complain. Very short mappings.
if rstart_buff > rend_buff:
tmp = rstart_buff
rstart_buff = rend_buff
rend_buff = tmp
## Buffering can't make start and end equal or pysam returns nothing.
if rstart_buff == rend_buff:
rend_buff += 1
## store pairs
rdict = {}
clust = []
iterreg = []
iterreg = samfile.fetch(chrom, rstart_buff, rend_buff)
## use dict to match up read pairs
for read in iterreg:
if read.qname not in rdict:
rdict[read.qname] = read
## sort dict keys so highest derep is first ('seed')
sfunc = lambda x: int(x.split(";size=")[1].split(";")[0])
rkeys = sorted(rdict.keys(), key=sfunc, reverse=True)
## get blocks from the seed for filtering, bail out if seed is not paired
try:
read1 = rdict[rkeys[0]]
except ValueError:
LOGGER.error("Found bad cluster, skipping - key:{} rdict:{}".format(rkeys[0], rdict))
return ""
## the starting blocks for the seed
poss = read1.get_reference_positions(full_length=True)
seed_r1start = min(poss)
seed_r1end = max(poss)
## store the seed -------------------------------------------
if read1.is_reverse:
seq = revcomp(read1.seq)
else:
seq = read1.seq
## store, could write orient but just + for now.
size = sfunc(rkeys[0])
clust.append(">{}:{}:{};size={};*\n{}"\
.format(chrom, seed_r1start, seed_r1end, size, seq))
## If there's only one hit in this region then rkeys will only have
## one element and the call to `rkeys[1:]` will raise. Test for this.
if len(rkeys) > 1:
## store the hits to the seed -------------------------------
for key in rkeys[1:]:
skip = False
try:
read1 = rdict[key]
except ValueError:
## enter values that will make this read get skipped
read1 = rdict[key][0]
skip = True
## orient reads only if not skipping
if not skip:
poss = read1.get_reference_positions(full_length=True)
minpos = min(poss)
maxpos = max(poss)
## store the seq
if read1.is_reverse:
seq = revcomp(read1.seq)
else:
seq = read1.seq
## store, could write orient but just + for now.
size = sfunc(key)
clust.append(">{}:{}:{};size={};+\n{}"\
.format(chrom, minpos, maxpos, size, seq))
else:
## seq is excluded, though, we could save it and return
## it as a separate cluster that will be aligned separately.
pass
return clust
cmd1 = [
ip.bins.bwa, "mem",
"-t", str(max(1, nthreads)),
"-M",
data.paramsdict['reference_sequence'],
]
cmd1 += [i for i in sample.concatfiles[0] if i]
# Insert optional flags for bwa
bwa_args = data._hackersonly["bwa_args"].split()
bwa_args.reverse()
for arg in bwa_args:
cmd1.insert(2, arg)
cmd1_stdout_handle = os.path.join(
data.dirs.refmapping, sample.name + ".sam")
cmd1_stdout = open(cmd1_stdout_handle, 'w')
cmd1_stderr = None
proc1 = sps.Popen(cmd1, stderr=cmd1_stderr, stdout=cmd1_stdout)
error1 = proc1.communicate()[0]
cmd2 = [
ip.bins.samtools, "view",
"-b",
#"-q", "30",
"-F", "0x904",
"-U", os.path.join(
data.dirs.refmapping, sample.name + "-unmapped.bam"),
os.path.join(data.dirs.refmapping, sample.name + ".sam")]
proc2 = sps.Popen(cmd2, stderr=sps.STDOUT, stdout=sps.PIPE)
#proc2.communicate()
proc3 = sps.Popen(cmd3, stderr=sps.STDOUT, stdout=sps.PIPE, stdin=proc2.stdout)
proc3.communicate()
(b'', None)
proc4 = sps.Popen(cmd4, stderr=sps.STDOUT, stdout=sps.PIPE)
proc4.communicate()
(b'', None)
sample.files.unmapped_reads = os.path.join(
data.dirs.refmapping,
"{}-unmapped.fastq".format(sample.name))
cmd4 = [ip.bins.samtools, "index", sample.files.mapped_reads]
# this is gonna read in the unmapped files, args are added below,
# and it will output fastq formatted unmapped reads for merging.
# -v 45 sets the default qscore arbitrarily high
cmd5 = [
ip.bins.samtools, "bam2fq",
"-v 45",
os.path.join(data.dirs.refmapping, sample.name + "-unmapped.bam")]
# Insert additional arguments for paired data to the commands.
# We assume Illumina paired end reads for the orientation
# of mate pairs (orientation: ---> <----).
if 'pair' in data.paramsdict["datatype"]:
# add samtools filter for only keep if both pairs hit
# 0x1 - Read is paired
# 0x2 - Each read properly aligned
cmd2.insert(2, "0x3")
cmd2.insert(2, "-f")
# tell bam2fq that there are output files for each read pair
cmd5.insert(2, os.path.join(data.dirs.edits, sample.name + "-tmp-umap1.fastq"))
cmd5.insert(2, "-1")
cmd5.insert(2, os.path.join(data.dirs.edits, sample.name + "-tmp-umap2.fastq"))
cmd5.insert(2, "-2")
else:
cmd5.insert(2, sample.files.unmapped_reads)
cmd5.insert(2, "-0")
cmd5
['/home/deren/Documents/ipyrad/bin/samtools-linux-x86_64', 'bam2fq', '-0', '/home/deren/Documents/ipyrad/tests/3-refsetest_refmapping/3K_0-unmapped.fastq', '-v 45', '/home/deren/Documents/ipyrad/tests/3-refsetest_refmapping/3K_0-unmapped.bam']
proc5 = sps.Popen(cmd5, stderr=sps.STDOUT, stdout=sps.PIPE)
error5 = proc5.communicate()[0]
sample.files.mapped_reads = os.path.join(
data.dirs.refmapping,
"{}-mapped-sorted.bam".format(sample.name))
# this is gonna catch mapped bam output from cmd2 and write to file
cmd3 = [
ip.bins.samtools, "sort",
"-T", os.path.join(data.dirs.refmapping, sample.name + ".sam.tmp"),
"-O", "bam",
"-o", sample.files.mapped_reads]
cmd3
['/home/deren/Documents/ipyrad/bin/samtools-linux-x86_64', 'sort', '-T', '/home/deren/Documents/ipyrad/tests/3-refsetest_refmapping/3K_0.sam.tmp', '-O', 'bam', '-o', '/home/deren/Documents/ipyrad/tests/3-refsetest_refmapping/3K_0-mapped-sorted.bam']
data.dirs.refmapping
''
nthreads = 2
cmd1 = [
ip.bins.bwa, "mem",
"-t", str(max(1, nthreads)),
"-M",
data.paramsdict['reference_sequence']
]
cmd1
#cmd1 += sample.files.dereps
['/home/deren/Documents/ipyrad/bin/bwa-linux-x86_64', 'mem', '-t', '2', '-M', '/home/deren/Documents/ipyrad/tests/ipsimdata/rad_example_genome.fa']
s3 = Step3(data, list(data.samples.values()), 0, 5, True, ipyclient)
sample = s3.samples[0]
s3.run()
[####################] 100% 0:00:01 | dereplicating | s3 | [####################] 100% 0:00:01 | clustering/mapping | s3 | [####################] 100% 0:00:00 | building clusters | s3 | [####################] 100% 0:00:00 | chunking clusters | s3 | [####################] 100% 0:00:15 | aligning clusters | s3 | [####################] 100% 0:00:00 | concat clusters | s3 |
#derep_sort_map(s3.data, sample, s3.force, s3.nthreads)
sample.concatfiles = concat_multiple_edits(s3.data, sample)
sample.mergedfile = merge_pairs(s3.data, sample, 1, 1)
sample.mergedfile
[('/home/deren/Documents/ipyrad/tests/2-setest_edits/3K_0.trimmed_R1_.fastq.gz', 0)]
new_derep_and_sort(s3.data, sample.mergedfile, os.path.join(s3.data.tmpdir, sample.name+ "_derep.fastq"), s3.nthreads)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-12-300134f582b0> in <module>() ----> 1 new_derep_and_sort(s3.data, sample.mergedfile, os.path.join(s3.data.tmpdir, sample.name+ "_derep.fastq"), s3.nthreads) ~/Documents/ipyrad/ipyrad/assemble/clustmap.py in new_derep_and_sort(data, infile, outfile, nthreads) 427 428 ## build PIPEd job --> 429 proc = sps.Popen(cmd, stderr=sps.STDOUT, stdout=sps.PIPE, close_fds=True) 430 errmsg = proc.communicate()[0] 431 if proc.returncode: ~/miniconda3/lib/python3.6/subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors) 707 c2pread, c2pwrite, 708 errread, errwrite, --> 709 restore_signals, start_new_session) 710 except: 711 # Cleanup if the child failed starting. ~/miniconda3/lib/python3.6/subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session) 1273 errread, errwrite, 1274 errpipe_read, errpipe_write, -> 1275 restore_signals, start_new_session, preexec_fn) 1276 self._child_created = True 1277 finally: TypeError: expected str, bytes or os.PathLike object, not list
strand = "plus"
if s3.data.paramsdict["datatype"] is ('gbs' or '2brad'):
strand = "both"
cmd = [
ip.bins.vsearch,
"--derep_fulllength", sample.mergedfile,
"--strand", strand,
"--output", outfile,
"--threads", str(nthreads),
"--fasta_width", str(0),
"--fastq_qmax", "1000",
"--sizeout",
"--relabel_md5",
]
s3.data.run("4")
Assembly: pairtest [####################] 100% 0:00:00 | inferring [H, E] | s4 | Encountered an unexpected error (see ./ipyrad_log.txt) Error message is below ------------------------------- The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
s3.remote_run_dereps()
#s3.remote_run_cluster_map_build()
[####################] 100% 0:00:01 | dereplicating | s3 |
for sample in s3.samples:
cluster(s3.data, sample, s3.nthreads, s3.force)
for sample in s3.samples:
build_clusters(s3.data, sample, s3.maxindels)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-8-dad8bccabeaa> in <module>() 1 for sample in s3.samples: ----> 2 build_clusters(s3.data, sample, s3.maxindels) ~/Documents/ipyrad/ipyrad/assemble/clustmap.py in build_clusters(data, sample, maxindels) 872 key=lambda x: 873 int(x.split(";size=")[-1].split("\n")[:-1]), --> 874 reverse=True) 875 seqlist.append("\n".join(fseqs)) 876 seqsize += 1 ~/Documents/ipyrad/ipyrad/assemble/clustmap.py in <lambda>(x) 871 fseqs = [fseqs[0]] + sorted(fseqs[1:], 872 key=lambda x: --> 873 int(x.split(";size=")[-1].split("\n")[:-1]), 874 reverse=True) 875 seqlist.append("\n".join(fseqs)) TypeError: int() argument must be a string, a bytes-like object or a number, not 'list'
for sample in s3.samples:
muscle_chunker(s3.data, sample)
aasyncs = {}
for sample in s3.samples:
aasyncs[sample.name] = []
for idx in range(10):
handle = os.path.join(s3.data.tmpdir,
"{}_chunk_{}.ali".format(sample.name, idx))
align_and_parse(handle, s3.maxindels, s3.gbs)
def _get_derep_num(read):
"return the number of replicates in a derep read"
return int(read.split("=")[-1].split("\n")[0][:-1])
def _aligned_indel_filter(clust, max_internal_indels):
""" checks for too many internal indels in muscle aligned clusters """
## make into list
lclust = clust.split()
## paired or not
try:
seq1 = [i.split("nnnn")[0] for i in lclust[1::2]]
seq2 = [i.split("nnnn")[1] for i in lclust[1::2]]
intindels1 = [i.rstrip("-").lstrip("-").count("-") for i in seq1]
intindels2 = [i.rstrip("-").lstrip("-").count("-") for i in seq2]
intindels = intindels1 + intindels2
if max(intindels) > max_internal_indels:
return 1
except IndexError:
seq1 = lclust[1::2]
intindels = [i.rstrip("-").lstrip("-").count("-") for i in seq1]
if max(intindels) > max_internal_indels:
return 1
return 0
for sample in s3.samples:
reconcat(s3.data, sample)
def align_and_parse(handle, max_internal_indels=5, is_gbs=False):
""" much faster implementation for aligning chunks """
## data are already chunked, read in the whole thing. bail if no data.
try:
with open(handle, 'rb') as infile:
clusts = infile.read().decode().split("//\n//\n")
# remove any empty spots
clusts = [i for i in clusts if i]
# Skip entirely empty chunks
if not clusts:
raise IPyradError("no clusters in file: {}".format(handle))
except (IOError, IPyradError):
LOGGER.debug("skipping empty chunk - {}".format(handle))
return 0
## count discarded clusters for printing to stats later
highindels = 0
## iterate over clusters sending each to muscle, splits and aligns pairs
aligned = _persistent_popen_align3(clusts, 200, is_gbs)
## store good alignments to be written to file
refined = []
## filter and trim alignments
for clust in aligned:
# check for too many internal indels
if not _aligned_indel_filter(clust, max_internal_indels):
refined.append(clust)
else:
highindels += 1
## write to file after
if refined:
outhandle = handle.rsplit(".", 1)[0] + ".aligned"
with open(outhandle, 'wb') as outfile:
outfile.write(str.encode("\n//\n//\n".join(refined) + "\n"))
## remove the old tmp file
if not LOGGER.getEffectiveLevel() == 10:
os.remove(handle)
return highindels
def _persistent_popen_align3(clusts, maxseqs=200, is_gbs=False):
""" keeps a persistent bash shell open and feeds it muscle alignments """
## create a separate shell for running muscle in, this is much faster
## than spawning a separate subprocess for each muscle call
proc = sps.Popen(
["bash"],
stdin=sps.PIPE,
stdout=sps.PIPE,
bufsize=0,
)
## iterate over clusters in this file until finished
aligned = []
for clust in clusts:
## new alignment string for read1s and read2s
align1 = []
align2 = []
## don't bother aligning if only one seq
if clust.count(">") == 1:
aligned.append(clust.replace(">", "").strip())
else:
# do we need to split the alignment? (is there a PE insert?)
try:
# make into list (only read maxseqs lines, 2X cuz names)
lclust = clust.split()[:maxseqs * 2]
# try to split cluster list at nnnn separator for each read
lclust1 = list(chain(*zip(
lclust[::2], [i.split("nnnn")[0] for i in lclust[1::2]])))
lclust2 = list(chain(*zip(
lclust[::2], [i.split("nnnn")[1] for i in lclust[1::2]])))
# put back into strings
clust1 = "\n".join(lclust1)
clust2 = "\n".join(lclust2)
# Align the first reads.
# The muscle command with alignment as stdin and // as split
cmd1 = ("echo -e '{}' | {} -quiet -in - ; echo {}"
.format(clust1, ip.bins.muscle, "//\n"))
# send cmd1 to the bash shell
proc.stdin.write(cmd1.encode())
# read the stdout by line until splitter is reached
# meaning that the alignment is finished.
for line in iter(proc.stdout.readline, b'//\n'):
align1.append(line.decode())
# Align the second reads.
# The muscle command with alignment as stdin and // as split
cmd2 = ("echo -e '{}' | {} -quiet -in - ; echo {}"
.format(clust2, ip.bins.muscle, "//\n"))
# send cmd2 to the bash shell
proc.stdin.write(cmd2.encode())
# read the stdout by line until splitter is reached
# meaning that the alignment is finished.
for line in iter(proc.stdout.readline, b'//\n'):
align2.append(line.decode())
# join up aligned read1 and read2 and ensure names order match
lines1 = "".join(align1)[1:].split("\n>")
lines2 = "".join(align2)[1:].split("\n>")
dalign1 = dict([i.split("\n", 1) for i in lines1])
dalign2 = dict([i.split("\n", 1) for i in lines2])
# sort the first reads
keys = list(dalign1.keys())
seed = [i for i in keys if i[-1] == "*"][0]
keys.pop(keys.index(seed))
order = [seed] + sorted(
keys, key=_get_derep_num, reverse=True)
# combine in order
alignpe = []
for key in order:
alignpe.append("\n".join([
key,
dalign1[key].replace("\n", "") + "nnnn" + \
dalign2[key].replace("\n", "")]))
## append aligned cluster string
aligned.append("\n".join(alignpe).strip())
# Malformed clust. Dictionary creation with only 1 element
except ValueError as inst:
ip.logger.debug(
"Bad PE cluster - {}\nla1 - {}\nla2 - {}"
.format(clust, lines1, lines2))
## Either reads are SE, or at least some pairs are merged.
except IndexError:
# limit the number of input seqs
# use lclust already built before checking pairs
lclust = "\n".join(clust.split()[:maxseqs * 2])
# the muscle command with alignment as stdin and // as splitter
cmd = ("echo -e '{}' | {} -quiet -in - ; echo {}"
.format(lclust, ip.bins.muscle, "//\n"))
## send cmd to the bash shell (TODO: PIPE could overflow here!)
proc.stdin.write(cmd.encode())
## read the stdout by line until // is reached. This BLOCKS.
for line in iter(proc.stdout.readline, b'//\n'):
align1.append(line.decode())
## remove '>' from names, and '\n' from inside long seqs
lines = "".join(align1)[1:].split("\n>")
## find seed of the cluster and put it on top.
seed = [i for i in lines if i.split(";")[-1][0] == "*"][0]
lines.pop(lines.index(seed))
lines = [seed] + sorted(
lines, key=_get_derep_num, reverse=True)
## format remove extra newlines from muscle
aa = [i.split("\n", 1) for i in lines]
align1 = [i[0] + '\n' + "".join([j.replace("\n", "")
for j in i[1:]]) for i in aa]
# trim edges in sloppy gbs/ezrad data.
# Maybe relevant to other types too...
if is_gbs:
align1 = _gbs_trim(align1)
## append to aligned
aligned.append("\n".join(align1))
# cleanup
proc.stdout.close()
if proc.stderr:
proc.stderr.close()
proc.stdin.close()
proc.wait()
## return the aligned clusters
return aligned
for sample in s3.samples:
ip.assemble.clustmap._sample_cleanup(s3.data, sample)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-35-a8a5a2cf58f8> in <module>() 1 for sample in s3.samples: ----> 2 ip.assemble.clustmap._sample_cleanup(s3.data, sample) ~/Documents/ipyrad/ipyrad/assemble/clustmap.py in _sample_cleanup(data, sample) 1262 1263 # get maxlen and depths array from clusters -> 1264 maxlens, depths = _get_quick_depths(data, sample) 1265 1266 try: ~/Documents/ipyrad/ipyrad/assemble/clustmap.py in _get_quick_depths(data, sample) 1250 1251 else: -> 1252 tdepth += int(name.split(";")[-2][5:]) 1253 tlen = len(seq) 1254 ValueError: invalid literal for int() with base 10: 'f72cb70f788295d6632d5d18442'
ip.assemble.clustmap._get_quick_depths(s3.data, sample)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-36-93c29eef65ac> in <module>() ----> 1 ip.assemble.clustmap._get_quick_depths(s3.data, sample) ~/Documents/ipyrad/ipyrad/assemble/clustmap.py in _get_quick_depths(data, sample) 1250 1251 else: -> 1252 tdepth += int(name.split(";")[-2][5:]) 1253 tlen = len(seq) 1254 ValueError: invalid literal for int() with base 10: 'f72cb70f788295d6632d5d18442'
if sample.files.get('clusters'):
pass
fclust = data.samples[sample.name].files.clusters
clusters = gzip.open(fclust, 'rt')
pairdealer = izip(*[iter(clusters)] * 2)
## storage
depths = []
maxlen = []
## start with cluster 0
tdepth = 0
tlen = 0
## iterate until empty
while 1:
## grab next
try:
name, seq = next(pairdealer)
except StopIteration:
break
## if not the end of a cluster
#print name.strip(), seq.strip()
#print(name)
if name.strip() == seq.strip():
depths.append(tdepth)
maxlen.append(tlen)
tlen = 0
tdepth = 0
else:
tdepth += int(name.strip().split("=")[-1][:-1])
tlen = len(seq)
sample.name
'1D_0'
s3.remote_run_dereps()
[####################] 100% 0:00:02 | dereplicating | s3 |
s3.remote_run_cluster_map_build()
[####################] 100% 0:00:05 | clustering/mapping | s3 | [####################] 100% 0:00:00 | building clusters | s3 | [####################] 100% 0:00:00 | chunking clusters | s3 |
s3.remote_run_align_cleanup()
[####################] 100% 0:00:02 | aligning clusters | s3 |
--------------------------------------------------------------------------- IPyradError Traceback (most recent call last) <ipython-input-8-2f667c287daa> in <module>() ----> 1 s3.remote_run_align_cleanup() ~/Documents/ipyrad/ipyrad/assemble/clustmap.py in remote_run_align_cleanup(self) 333 for job in allasyncs: 334 if not job.successful(): --> 335 raise IPyradError(job.exception()) 336 337 # track job progress IPyradError: TypeError(a bytes-like object is required, not 'str')
handle = "pairtest-tmpalign/1A_0_chunk_0.ali"
with open(handle, 'rb') as infile:
clusts = infile.read().decode().split("//\n//\n")
# remove any empty spots
clusts = [i for i in clusts if i]
# Skip entirely empty chunks
if not clusts:
raise IPyradError("no clusters in file: {}".format(handle))
maxseqs = 200
is_gbs = False
proc = sps.Popen(
["bash"],
stdin=sps.PIPE,
stdout=sps.PIPE,
bufsize=0,
)
## iterate over clusters in this file until finished
aligned = []
for clust in clusts:
## new alignment string for read1s and read2s
align1 = []
align2 = []
## don't bother aligning if only one seq
if clust.count(">") == 1:
aligned.append(clust.replace(">", "").strip())
else:
# do we need to split the alignment? (is there a PE insert?)
try:
# make into list (only read maxseqs lines, 2X cuz names)
lclust = clust.split()[:maxseqs * 2]
# try to split cluster list at nnnn separator for each read
lclust1 = list(chain(*zip(
lclust[::2], [i.split("nnnn")[0] for i in lclust[1::2]])))
lclust2 = list(chain(*zip(
lclust[::2], [i.split("nnnn")[1] for i in lclust[1::2]])))
# put back into strings
clust1 = "\n".join(lclust1)
clust2 = "\n".join(lclust2)
# Align the first reads.
# The muscle command with alignment as stdin and // as split
cmd1 = ("echo -e '{}' | {} -quiet -in - ; echo {}"
.format(clust1, ip.bins.muscle, "//\n"))
# send cmd1 to the bash shell
proc.stdin.write(cmd1.encode())
# read the stdout by line until splitter is reached
# meaning that the alignment is finished.
for line in iter(proc.stdout.readline, '//\n'):
align1.append(line)
# Align the second reads.
# The muscle command with alignment as stdin and // as split
cmd2 = ("echo -e '{}' | {} -quiet -in - ; echo {}"
.format(clust2, ip.bins.muscle, "//\n"))
# send cmd2 to the bash shell
proc.stdin.write(cmd2.encode())
# read the stdout by line until splitter is reached
# meaning that the alignment is finished.
for line in iter(proc.stdout.readline, b'//\n'):
align2.append(line)
# join up aligned read1 and read2 and ensure names order match
lines1 = "".join(align1)[1:].split("\n>")
lines2 = "".join(align2)[1:].split("\n>")
dalign1 = dict([i.split("\n", 1) for i in lines1])
dalign2 = dict([i.split("\n", 1) for i in lines2])
# sort the first reads
keys = list(dalign1.keys())
seed = [i for i in keys if i[-1] == "*"][0]
keys.pop(keys.index(seed))
order = [seed] + sorted(
keys, key=_get_derep_num, reverse=True)
# combine in order
for key in order:
align1.append("\n".join([
key,
dalign1[key].replace("\n", "") + "nnnn" + \
dalign2[key].replace("\n", "")]))
## append aligned cluster string
aligned.append("\n".join(align1).strip())
# Malformed clust. Dictionary creation with only 1 element
except ValueError as inst:
ip.logger.debug(
"Bad PE cluster - {}\nla1 - {}\nla2 - {}"
.format(clust, lines1, lines2))
## Either reads are SE, or at least some pairs are merged.
except IndexError:
# limit the number of input seqs
# use lclust already built before checking pairs
lclust = "\n".join(clust.split()[:maxseqs * 2])
# the muscle command with alignment as stdin and // as splitter
cmd = ("echo -e '{}' | {} -quiet -in - ; echo {}"
.format(lclust, ip.bins.muscle, "//\n"))
## send cmd to the bash shell (TODO: PIPE could overflow here!)
proc.stdin.write(cmd.encode())
## read the stdout by line until // is reached. This BLOCKS.
for line in iter(proc.stdout.readline, b'//\n'):
align1.append(line.decode())
## remove '>' from names, and '\n' from inside long seqs
lines = "".join(align1)[1:].split("\n>")
## find seed of the cluster and put it on top.
seed = [i for i in lines if i.split(";")[-1][0] == "*"][0]
lines.pop(lines.index(seed))
lines = [seed] + sorted(
lines, key=_get_derep_num, reverse=True)
## format remove extra newlines from muscle
aa = [i.split("\n", 1) for i in lines]
align1 = [i[0] + '\n' + "".join([j.replace("\n", "")
for j in i[1:]]) for i in aa]
# trim edges in sloppy gbs/ezrad data.
# Maybe relevant to other types too...
if is_gbs:
align1 = _gbs_trim(align1)
## append to aligned
aligned.append("\n".join(align1))
# cleanup
proc.stdout.close()
if proc.stderr:
proc.stderr.close()
proc.stdin.close()
proc.wait()
## return the aligned clusters
#return aligned
from ipyrad.assemble.clustmap import _get_derep_num
# join up aligned read1 and read2 and ensure names order match
lines1 = "".join(align1)[1:].split("\n>")
lines2 = "".join(align2)[1:].split("\n>")
dalign1 = dict([i.split("\n", 1) for i in lines1])
dalign2 = dict([i.split("\n", 1) for i in lines2])
# sort the first reads
keys = list(dalign1.keys())
seed = [i for i in keys if i[-1] == "*"][0]
keys.pop(keys.index(seed))
order = [seed] + sorted(
keys, key=_get_derep_num, reverse=True)
# join up aligned read1 and read2 and ensure names order match
lines1 = "".join(align1)[1:].split("\n>")
lines2 = "".join(align2)[1:].split("\n>")
dalign1 = dict([i.split("\n", 1) for i in lines1])
dalign2 = dict([i.split("\n", 1) for i in lines2])
# sort the first reads
keys = list(dalign1.keys())
seed = [i for i in keys if i[-1] == "*"][0]
keys.pop(keys.index(seed))
order = [seed] + sorted(
keys, key=_get_derep_num, reverse=True)
# combine in order
for key in order:
align1.append("\n".join([
key,
dalign1[key].replace("\n", "") + "nnnn" + \
dalign2[key].replace("\n", "")]))
## append aligned cluster string
aligned.append("\n".join(align1).strip())
aligned
['>12f1ffdaa3a3d7c8310998dea05a56dd;size=22;*\n\nTGCAGCCTGGTCAATAGCCCCCAATTGGTCGATCCCGTTTATACTTGCAGAACAAATCGT\n\nTATATACAAAGACGGATGTGGTGCGAAATAC\n\n>1f4df54209f2d8d9be6019fde95d7579;size=1;+\n\nTGCAGCCTGGTCAATAGCCCCCAATTGGTCGATCCCGTTTATACTTGCAGAACAAATCGT\n\nTATATACAAAGACGGATGTGGTGCGAAATAC\n\n>4b85ee58466075729a68837e2b016ad7;size=1;+\n\nTGCAGCCTGGTCAATAGCCCCCAATTGGTCGATCCCGTTTATACTTGCAGAACAAATCGT\n\nTATATACAAAGACGGATGTGGTGCGAAATAC\n\n12f1ffdaa3a3d7c8310998dea05a56dd;size=22;*\nTGCAGCCTGGTCAATAGCCCCCAATTGGTCGATCCCGTTTATACTTGCAGAACAAATCGTTATATACAAAGACGGATGTGGTGCGAAATACnnnnGGCTCCTATTTCAAGTACCGTCTAATGTCAATAAGATGGTTTCGATGCGTGGAGAGAAACCCACTCTGAACGTCCCGATCACAGCGTTGGCTCTACTCCG\n1f4df54209f2d8d9be6019fde95d7579;size=1;+\nTGCAGCCTGGTCAATAGCCCCCAATTGGTCGATCCCGTTTATACTTGCAGAACAAATCGTTATATACAAAGACGGATGTGGTGCGAAATACnnnnGGCTCCTATTTCAAGTACCGTCTAATGTCAATAAGATGGTTTCGATGCGTGGAGAGAAACCCACTCTGAACGTCCCGATCACAGCGTTCGCTCTACTCCG\n4b85ee58466075729a68837e2b016ad7;size=1;+\nTGCAGCCTGGTCAATAGCCCCCAATTGGTCGATCCCGTTTATACTTGCAGAACAAATCGTTATATACAAAGACGGATGTGGTGCGAAATACnnnnGGCTCCTATTTCAAGTACCGTCTAATGTCAATAAGATGGTTTCGATGCGTGGAGAGAAACCCACTCTAAACGTCCCGATCACAGCGTTGGCTCTACTCCG']
## remove '>' from names, and '\n' from inside long seqs
lines1 = "".join(align1)[1:].split("\n>")
seed = [i for i in lines1 if i.split(";")[-1][0] == "*"][0]
lines1.pop(lines1.index(seed))
lines1 = [seed] + sorted(
lines1, key=_get_derep_num, reverse=True)
dalign1 = dict([i.split("\n", 1) for i in lines1])
lines2 = "".join(align2)[1:].split("\n>")
dalign2 = dict([i.split("\n", 1) for i in lines2])
#seed = [i for i in lines2 if i.split(";")[-1][0] == "*"][0]
#lines2.pop(lines2.index(seed))
## format remove extra newlines from muscle
aa = [i.split("\n", 1) for i in lines]
align1 = [i[0] + '\n' + "".join([j.replace("\n", "")
for j in i[1:]]) for i in aa]
align1
['>12f1ffdaa3a3d7c8310998dea05a56dd;size=22;*\n', 'TGCAGCCTGGTCAATAGCCCCCAATTGGTCGATCCCGTTTATACTTGCAGAACAAATCGT\n', 'TATATACAAAGACGGATGTGGTGCGAAATAC\n', '>1f4df54209f2d8d9be6019fde95d7579;size=1;+\n', 'TGCAGCCTGGTCAATAGCCCCCAATTGGTCGATCCCGTTTATACTTGCAGAACAAATCGT\n', 'TATATACAAAGACGGATGTGGTGCGAAATAC\n', '>4b85ee58466075729a68837e2b016ad7;size=1;+\n', 'TGCAGCCTGGTCAATAGCCCCCAATTGGTCGATCCCGTTTATACTTGCAGAACAAATCGT\n', 'TATATACAAAGACGGATGTGGTGCGAAATAC\n']
derephandle = os.path.join(data.tmpdir, sample.name + "_derep.fastq")
uhandle = os.path.join(data.dirs.clusts, sample.name + ".utemp")
temphandle = os.path.join(data.dirs.clusts, sample.name + ".htemp")
derepfile = os.path.join(data.tmpdir, sample.name + "_derep.fastq")
uhandle = os.path.join(data.dirs.clusts, sample.name + ".utemp")
usort = os.path.join(data.dirs.clusts, sample.name + ".utemp.sort")
hhandle = os.path.join(data.dirs.clusts, sample.name + ".htemp")
sample.files.clusters = os.path.join(
data.dirs.clusts, sample.name + ".clust.gz")
clustsout = gzip.open(sample.files.clusters, 'wt')
cmd = ["sort", "-k", "2", uhandle, "-o", usort]
proc = sps.Popen(cmd, close_fds=True)
proc.communicate()[0]
alldereps = {}
with open(derepfile, 'rt') as ioderep:
dereps = izip(*[iter(ioderep)] * 2)
for namestr, seq in dereps:
nnn, sss = [i.strip() for i in (namestr, seq)]
alldereps[nnn[1:]] = sss
seedsseen = set()
maxindels = 8
## Iterate through the usort file grabbing matches to build clusters
with open(usort, 'rt') as insort:
## iterator, seed null, seqlist null
isort = iter(insort)
lastseed = 0
fseqs = []
seqlist = []
seqsize = 0
while 1:
## grab the next line
try:
hit, seed, _, ind, ori, _ = next(isort).strip().split()
except StopIteration:
break
## same seed, append match
if seed != lastseed:
seedsseen.add(seed)
## store the last cluster (fseq), count it, and clear fseq
if fseqs:
## sort fseqs by derep after pulling out the seed
fseqs = [fseqs[0]] + sorted(fseqs[1:],
key=lambda x:
int(x.split(";size=")[1].split(";")[0]), reverse=True)
seqlist.append("\n".join(fseqs))
seqsize += 1
fseqs = []
# occasionally write/dump stored clusters to file and clear mem
if not seqsize % 10000:
if seqlist:
clustsout.write(
"\n//\n//\n".join(seqlist) + "\n//\n//\n")
## reset list and counter
seqlist = []
## store the new seed on top of fseq list
fseqs.append(">{}*\n{}".format(seed, alldereps[seed]))
lastseed = seed
## add match to the seed
## revcomp if orientation is reversed (comp preserves nnnn)
if ori == "-":
seq = comp(alldereps[hit])[::-1]
else:
seq = alldereps[hit]
## only save if not too many indels
if int(ind) <= maxindels:
fseqs.append(">{}{}\n{}".format(hit, ori, seq))
else:
ip.logger.info("filtered by maxindels: %s %s", ind, seq)
## write whatever is left over to the clusts file
if fseqs:
seqlist.append("\n".join(fseqs))
if seqlist:
clustsout.write("\n//\n//\n".join(seqlist) + "\n//\n//\n")
## now write the seeds that had no hits. Make dict from htemp
with open(hhandle, 'rt') as iotemp:
nohits = izip(*[iter(iotemp)] * 2)
seqlist = []
seqsize = 0
while 1:
try:
nnn, _ = [i.strip() for i in next(nohits)]
except StopIteration:
break
## occasionally write to file
if not seqsize % 10000:
if seqlist:
clustsout.write("\n//\n//\n".join(seqlist) + "\n//\n//\n")
## reset list and counter
seqlist = []
## append to list if new seed
if nnn[1:] not in seedsseen:
seqlist.append("{}*\n{}".format(nnn, alldereps[nnn[1:]]))
seqsize += 1
## write whatever is left over to the clusts file
if seqlist:
clustsout.write("\n//\n//\n".join(seqlist))
## close the file handle
clustsout.close()
del alldereps
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-25-0723860e8118> in <module>() 25 fseqs = [fseqs[0]] + sorted(fseqs[1:], 26 key=lambda x: ---> 27 int(x.split(";size=")[1].split(";")[0]), reverse=True) 28 seqlist.append("\n".join(fseqs)) 29 seqsize += 1 <ipython-input-25-0723860e8118> in <lambda>(x) 25 fseqs = [fseqs[0]] + sorted(fseqs[1:], 26 key=lambda x: ---> 27 int(x.split(";size=")[1].split(";")[0]), reverse=True) 28 seqlist.append("\n".join(fseqs)) 29 seqsize += 1 ValueError: invalid literal for int() with base 10: '1+\nTGCAGGTCACTTTTCAAGATACACTATTGTTATTACTGTGAGACACAAAGCTAATTCATCACTTCACGGATACCGCGTCCTCCTATAACGCnnnnCAATATTAACGCGTGAGTACCGGTTTCCTTGTGAGGAAGGCCCACTCTCAGTACCACCCTTATCCTATTCTAAGGCACACATGCATAGACCACTCAACCG
seedsseen
{'00519cc27c6ab8f71dcdef028ad184e8;size=12', '00a2c3fa80127d8adfc783464de05df4;size=10'}
sample.concatfiles = concat_multiple_edits(data, sample)
sample.mergedfile = merge_pairs(data, sample, 1, 1)
new_derep_and_sort(
data,
sample.mergedfile,
os.path.join(data.tmpdir, sample.name + "_derep.fastq"),
2)
data = data
sample = sample
revcomp = 1
vsearch_merge = 1
sample.concatfiles = concat_multiple_edits(data, sample)
sample.mergefile = os.path.join(data.tmpdir, sample.name + "_merged_.fastq")
sample.mergefile = os.path.join(data.tmpdir, sample.name + "_merged_.fastq")
if 'pair' in data.paramsdict['datatype']:
if "reference" not in data.paramsdict["assembly_method"]:
nmerged = ip.assemble.clustmap._merge_pairs(data, sample, 1, 1)
else:
nmerged = 0 # _merge_pairs(data, sample, 0, 0)
sample.stats.reads_merged = nmerged
new_derep_and_sort(data, sampl)
def new_derep_and_sort(data, infile, outfile, nthreads):
"""
Dereplicates reads and sorts so reads that were highly replicated are at
the top, and singletons at bottom, writes output to derep file. Paired
reads are dereplicated as one concatenated read and later split again.
Updated this function to take infile and outfile to support the double
dereplication that we need for 3rad (5/29/15 iao).
"""
## datatypes options
strand = "plus"
if data.paramsdict["datatype"] is ('gbs' or '2brad'):
strand = "both"
## do dereplication with vsearch
cmd = [
ip.bins.vsearch,
"--derep_fulllength", infile,
"--strand", strand,
"--output", outfile,
"--threads", str(nthreads),
"--fasta_width", str(0),
"--fastq_qmax", "1000",
"--sizeout",
"--relabel_md5",
]
ip.logger.info("derep cmd %s", cmd)
## build PIPEd job
proc = sps.Popen(cmd, stderr=sps.STDOUT, stdout=sps.PIPE, close_fds=True)
errmsg = proc.communicate()[0]
if proc.returncode:
ip.logger.error("error inside derep_and_sort %s", errmsg)
raise IPyradWarningExit(errmsg)
new_derep_and_sort()
## CONCAT FILES FOR MERGED ASSEMBLIES
mergefile = os.path.join(data.tmpdir, sample.name + "_merged_.fastq")
sample.files.edits = concat_multiple_edits(data, sample)
sample.files.edits = [(mergefile, )]
nmerged = ip.assemble.clustmap._merge_pairs(
data, sample.files.edits, mergefile, 1, 1)
sample.stats.reads_merged = nmerged
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-41-50484bfab4a1> in <module>() 1 sample.files.edits = [(mergefile, )] 2 nmerged = ip.assemble.clustmap._merge_pairs( ----> 3 data, sample.files.edits, mergefile, 1, 1) 4 sample.stats.reads_merged = nmerged ~/Documents/ipyrad/ipyrad/assemble/clustmap.py in _merge_pairs(data, two_files, merged_out, revcomp, merge) 693 else: 694 tmp1 = two_files[0][0] --> 695 tmp2 = two_files[0][1] 696 697 try: IndexError: tuple index out of range
mergefile
'/home/deren/Documents/ipyrad/pairtest-tmpalign/2E_0_merged_.fastq'
sample.files.edits
[('/home/deren/Documents/ipyrad/pairtest-tmpalign/2E_0_merged_.fastq',)]
s3.run()
[####################] 100% 0:00:00 | dereplicating | s3 | [####################] 100% 0:00:01 | clustering/mapping | s3 | [####################] 100% 0:00:00 | building clusters | s3 | [####################] 100% 0:00:00 | chunking clusters | s3 | [####################] 100% 0:00:15 | aligning clusters | s3 | [####################] 100% 0:00:00 | concat clusters | s3 |
# muscle align returns values for bad alignments
ip.assemble.cluster_within.sample_cleanup(data, samples[0])
data._build_stat("s3")
clusters_total | hidepth_min | clusters_hidepth | avg_depth_total | avg_depth_mj | avg_depth_stat | sd_depth_total | sd_depth_mj | sd_depth_stat | filtered_bad_align | |
---|---|---|---|---|---|---|---|---|---|---|
1A_0 | 1000.0 | 6.0 | 1000.0 | 19.862 | 19.862 | 19.862 | 2.830717 | 2.830717 | 2.830717 | 0.0 |
1B_0 | 1000.0 | 6.0 | 1000.0 | 20.043 | 20.043 | 20.043 | 2.807339 | 2.807339 | 2.807339 | 0.0 |
1C_0 | 1000.0 | 6.0 | 1000.0 | 20.136 | 20.136 | 20.136 | 2.874283 | 2.874283 | 2.874283 | 0.0 |
1D_0 | 1000.0 | 6.0 | 1000.0 | 19.966 | 19.966 | 19.966 | 2.738402 | 2.738402 | 2.738402 | 0.0 |
2E_0 | 1000.0 | 6.0 | 1000.0 | 20.017 | 20.017 | 20.017 | 2.778977 | 2.778977 | 2.778977 | 0.0 |
2F_0 | 1000.0 | 6.0 | 1000.0 | 19.933 | 19.933 | 19.933 | 2.833463 | 2.833463 | 2.833463 | 0.0 |
2G_0 | 1000.0 | 6.0 | 1000.0 | 20.030 | 20.030 | 20.030 | 2.773644 | 2.773644 | 2.773644 | 0.0 |
2H_0 | 1000.0 | 6.0 | 1000.0 | 20.199 | 20.199 | 20.199 | 2.870087 | 2.870087 | 2.870087 | 0.0 |
3I_0 | 1000.0 | 6.0 | 1000.0 | 19.885 | 19.885 | 19.885 | 3.012603 | 3.012603 | 3.012603 | 0.0 |
3J_0 | 1000.0 | 6.0 | 1000.0 | 19.822 | 19.822 | 19.822 | 2.878596 | 2.878596 | 2.878596 | 0.0 |
3K_0 | 1000.0 | 6.0 | 1000.0 | 19.965 | 19.965 | 19.965 | 2.885788 | 2.885788 | 2.885788 | 0.0 |
3L_0 | 1000.0 | 6.0 | 1000.0 | 20.008 | 20.008 | 20.008 | 2.904813 | 2.904813 | 2.904813 | 0.0 |
raise IPyradError("hi")
--------------------------------------------------------------------------- IPyradError Traceback (most recent call last) <ipython-input-7-5110f32adaa3> in <module>() ----> 1 raise IPyradError("hi") IPyradError: hi
self = data
derep_sort_map(s3.data, samples[0], s3.force)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-4-c34ce6e8f70d> in <module>() 1 self = data ----> 2 derep_sort_map(s3.data, samples[0], s3.force) TypeError: derep_sort_map() missing 1 required positional argument: 'nthreads'
ra.exception()
<Remote[2]:TypeError(derep_sort_map() missing 1 required positional argument: 'sample')>
data.get_params()