%%bash
mkdir -p analysis_pyrad

%%bash
pyrad -n

%%bash
sed -i '/## 1. /c\analysis_pyrad/      ## 1. working directory   ' params.txt
sed -i '/## 2. /c\/home/deren/Longiflorae/*.gz   ## 2. working directory     ' params.txt
sed -i '/## 3. /c\/home/deren/barcodes/cyatho_longi.barcodes  ## 3. barcodes ' params.txt
sed -i '/## 7. /c\12                   ## 7. N processors   ' params.txt
sed -i '/## 9. /c\6                    ## 9. NQual     ' params.txt
sed -i '/## 10. /c\.85                 ## 10. Wclust     ' params.txt
sed -i '/## 11. /c\pairgbs             ## 11. datatype   ' params.txt
sed -i '/## 14. /c\c85d6m4p3           ## 14. output prefix  ' params.txt
sed -i '/## 29./c\1,1                  ## 29. trim edges ' params.txt
sed -i '/## 30./c\p                    ## 30. output formats ' params.txt

cat params.txt

%%bash
pyrad -p params.txt -s 1

%%bash
head -n 100 analysis_pyrad/stats/s1.sorting.txt

%%bash
## For this example I subselect only the samples from 
## this library that start with the letter 'd'
gunzip analysis_pyrad/fastq/d*.gz

%%bash
for gfile in analysis_pyrad/fastq/*_R1.fq;
    do pear -f $gfile \
            -r ${gfile/_R1.fq/_R2.fq} \
            -o ${gfile/_R1.fq/} \
            -n 33 \
            -t 33 \
            -q 10 \
            -j 20  >> pear.log 2>&1;
done

%%bash
## set location of demultiplexed data that are 'pear' filtered
sed -i '/## 18./c\analysis_pyrad/fastq/*.assembled.fastq ## 18. demulti data loc ' ./params.txt

%%bash
sed -i '/## 11./c\merged            ## 11. data type ' ./params.txt

%%bash
pyrad -p params.txt -s 2

%%bash
cat analysis_pyrad/stats/s2.rawedit.txt

%%bash
pyrad -p params.txt -s 3

%%bash
cat analysis_pyrad/stats/s3 

%%bash
gzip -cd analysis_pyrad/clust.85/d35422.assembled.clustS.gz | head -n 100 | cut -b 1-80

%%bash
pyrad -p params.txt -s 4

%%bash
cat analysis_pyrad/stats/Pi_E_estimate.txt

%%bash
pyrad -p params.txt -s 5

%%bash
cat analysis_pyrad/stats/s5.consens.txt

%%bash
pyrad -p params.txt -s 6

%%bash
pyrad -p params.txt -s 7

%%bash
cat analysis_pyrad/stats/c88d6m4p3.stats

%%bash
head -n 100 analysis_pyrad/outfiles/c88d6m4p3.loci | cut -b 1-88

%%bash
raxmlHPC-PTHREADS-AVX -s analysis_pyrad/outfiles/c88d6m4p3.phy \
                      -m GTRGAMMA \
                      -f a \
                      -x 12345 \
                      -p 12345 \
                      -N 100 \
                      -w /home/deren/Documents/PEGBS/analysis_raxml/ \
                      -n c88d6m4p3 \
                      -o "d35320.assembled" \
                      -T 20 &> /dev/null

%load_ext rmagic

%%R
library(ape)
tre <- read.tree("analysis_raxml/RAxML_bipartitions.c88d6m4p3")
plot(tre)
nodelabels(tre$node.label)

%%bash
## This is the step 5 results after I moved the consens.gz files out of 
## the clust.85/ directory and replaced them with new consens calls 
## generated by running step 5 with the majority rule consensus 
## option turned on. 

tail -n 32 analysis_pyrad/stats/s5.consens.txt