#!/usr/bin/env python # coding: utf-8 # ### Running in Docker container on Swoose # # Started Docker container with the following command: # # ```docker run -p 8888:8888 -v /home/sam/data/:/home/data -it bioinformatics/bioinformatics:v0 /bin/bash``` # # The command allows /home/sam/data/ to be accessible to the Docker container. # Once access to Jupyter Notebook over port 8888 and makes my Jupyter Notebook GitHub repo and my data files the container was started, started Jupyter Notebook with the following command inside the Docker container: # # ```jupyter notebook --allow-root``` # # This is configured in the Docker container to launch a Jupyter Notebook without a browser on port 8888. The Docker container is running on an image created from this Dockerfile (Git commit [ece177e](https://github.com/RobertsLab/code/commit/ece177e2ad73b5eff5daa67d9000e198a704ff1d)) # In[1]: get_ipython().run_cell_magic('bash', '', 'date\n') # In[2]: get_ipython().run_cell_magic('bash', '', 'hostname\n') # In[3]: get_ipython().run_cell_magic('bash', '', 'lscpu\n') # In[4]: get_ipython().run_cell_magic('bash', '', 'free -mh\n') # In[5]: get_ipython().run_cell_magic('bash', '', 'pwd\n') # #### Print list of files for easy copying/pasting into redundans command # In[6]: get_ipython().run_cell_magic('bash', '', 'for i in /home/data/*subreads*\ndo\necho "$i"\ndone\n') # #### Print list of PacBio files for easy copying/pasting into redundans command # In[7]: get_ipython().run_cell_magic('bash', '', 'cd ..\nfor i in *subreads*\ndo\necho "$i"\ndone\n') # In[8]: get_ipython().run_cell_magic('bash', '', 'for i in 1[56]*\ndo\necho "$i"\ndone\n') # In[9]: get_ipython().run_cell_magic('bash', '', 'pwd\n') # #### Print list of Illumina files for easy copying/pasting into redundans command # In[10]: get_ipython().run_cell_magic('bash', '', 'cd ..\nfor i in 1[56]*\ndo\necho "$i"\ndone\n') # In[11]: get_ipython().system('python /usr/local/bioinformatics/redundans/redundans.py -t 24 -l m170211_224036_42134_c101073082550000001823236402101737_s1_X0_filtered_subreads.fastq.gz m170301_100013_42134_c101174162550000001823269408211761_s1_p0_filtered_subreads.fastq.gz m170301_162825_42134_c101174162550000001823269408211762_s1_p0_filtered_subreads.fastq.gz m170301_225711_42134_c101174162550000001823269408211763_s1_p0_filtered_subreads.fastq.gz m170308_163922_42134_c101174252550000001823269408211742_s1_p0_filtered_subreads.fastq.gz m170308_230815_42134_c101174252550000001823269408211743_s1_p0_filtered_subreads.fastq.gz m170315_001112_42134_c101169372550000001823273008151717_s1_p0_filtered_subreads.fastq.gz m170315_063041_42134_c101169382550000001823273008151700_s1_p0_filtered_subreads.fastq.gz m170315_124938_42134_c101169382550000001823273008151701_s1_p0_filtered_subreads.fastq.gz m170315_190851_42134_c101169382550000001823273008151702_s1_p0_filtered_subreads.fastq.gz -i 151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_1.fq.gz 151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_2.fq.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_1.fq.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_2.fq.gz 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_1.fq.gz 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_2.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCABDLAAPEI-62_1.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCABDLAAPEI-62_2.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCACDTAAPEI-75_1.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCACDTAAPEI-75_2.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCABDLAAPEI-62_1.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCABDLAAPEI-62_2.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCACDTAAPEI-75_1.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCACDTAAPEI-75_2.fq.gz 160103_I137_FCH3V5YBBXX_L5_WHOSTibkDCAADWAAPEI-74_1.fq.gz 160103_I137_FCH3V5YBBXX_L5_WHOSTibkDCAADWAAPEI-74_2.fq.gz 160103_I137_FCH3V5YBBXX_L6_WHOSTibkDCAADWAAPEI-74_1.fq.gz 160103_I137_FCH3V5YBBXX_L6_WHOSTibkDCAADWAAPEI-74_2.fq.gz -o /home/data/20171003_redundans/') # #### This isn't the cause of the error message above, but just realized I'm in the wrong directory. # In[12]: cd .. # In[13]: get_ipython().system('python /usr/local/bioinformatics/redundans/redundans.py -t 24 -l m170211_224036_42134_c101073082550000001823236402101737_s1_X0_filtered_subreads.fastq.gz m170301_100013_42134_c101174162550000001823269408211761_s1_p0_filtered_subreads.fastq.gz m170301_162825_42134_c101174162550000001823269408211762_s1_p0_filtered_subreads.fastq.gz m170301_225711_42134_c101174162550000001823269408211763_s1_p0_filtered_subreads.fastq.gz m170308_163922_42134_c101174252550000001823269408211742_s1_p0_filtered_subreads.fastq.gz m170308_230815_42134_c101174252550000001823269408211743_s1_p0_filtered_subreads.fastq.gz m170315_001112_42134_c101169372550000001823273008151717_s1_p0_filtered_subreads.fastq.gz m170315_063041_42134_c101169382550000001823273008151700_s1_p0_filtered_subreads.fastq.gz m170315_124938_42134_c101169382550000001823273008151701_s1_p0_filtered_subreads.fastq.gz m170315_190851_42134_c101169382550000001823273008151702_s1_p0_filtered_subreads.fastq.gz -i 151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_1.fq.gz 151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_2.fq.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_1.fq.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_2.fq.gz 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_1.fq.gz 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_2.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCABDLAAPEI-62_1.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCABDLAAPEI-62_2.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCACDTAAPEI-75_1.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCACDTAAPEI-75_2.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCABDLAAPEI-62_1.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCABDLAAPEI-62_2.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCACDTAAPEI-75_1.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCACDTAAPEI-75_2.fq.gz 160103_I137_FCH3V5YBBXX_L5_WHOSTibkDCAADWAAPEI-74_1.fq.gz 160103_I137_FCH3V5YBBXX_L5_WHOSTibkDCAADWAAPEI-74_2.fq.gz 160103_I137_FCH3V5YBBXX_L6_WHOSTibkDCAADWAAPEI-74_1.fq.gz 160103_I137_FCH3V5YBBXX_L6_WHOSTibkDCAADWAAPEI-74_2.fq.gz - f oly_pacbio_.contigs.fasta -o /home/data/20171003_redundans/') # #### Ah, I see the error now! Need to remove space between hyphen and "f" option! # In[14]: get_ipython().system('python /usr/local/bioinformatics/redundans/redundans.py -t 24 -l m170211_224036_42134_c101073082550000001823236402101737_s1_X0_filtered_subreads.fastq.gz m170301_100013_42134_c101174162550000001823269408211761_s1_p0_filtered_subreads.fastq.gz m170301_162825_42134_c101174162550000001823269408211762_s1_p0_filtered_subreads.fastq.gz m170301_225711_42134_c101174162550000001823269408211763_s1_p0_filtered_subreads.fastq.gz m170308_163922_42134_c101174252550000001823269408211742_s1_p0_filtered_subreads.fastq.gz m170308_230815_42134_c101174252550000001823269408211743_s1_p0_filtered_subreads.fastq.gz m170315_001112_42134_c101169372550000001823273008151717_s1_p0_filtered_subreads.fastq.gz m170315_063041_42134_c101169382550000001823273008151700_s1_p0_filtered_subreads.fastq.gz m170315_124938_42134_c101169382550000001823273008151701_s1_p0_filtered_subreads.fastq.gz m170315_190851_42134_c101169382550000001823273008151702_s1_p0_filtered_subreads.fastq.gz -i 151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_1.fq.gz 151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_2.fq.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_1.fq.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_2.fq.gz 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_1.fq.gz 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_2.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCABDLAAPEI-62_1.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCABDLAAPEI-62_2.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCACDTAAPEI-75_1.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCACDTAAPEI-75_2.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCABDLAAPEI-62_1.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCABDLAAPEI-62_2.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCACDTAAPEI-75_1.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCACDTAAPEI-75_2.fq.gz 160103_I137_FCH3V5YBBXX_L5_WHOSTibkDCAADWAAPEI-74_1.fq.gz 160103_I137_FCH3V5YBBXX_L5_WHOSTibkDCAADWAAPEI-74_2.fq.gz 160103_I137_FCH3V5YBBXX_L6_WHOSTibkDCAADWAAPEI-74_1.fq.gz 160103_I137_FCH3V5YBBXX_L6_WHOSTibkDCAADWAAPEI-74_2.fq.gz -f oly_pacbio_.contigs.fasta -o /home/data/20171003_redundans/') # ### Run redundans # # #### The FASTA file used below is an assembly generated by [Sean with Canu](http://owl.fish.washington.edu/scaphapoda/Sean/Oly_Canu_Output/) # In[15]: get_ipython().system('python /usr/local/bioinformatics/redundans/redundans.py -t 24 -l m170211_224036_42134_c101073082550000001823236402101737_s1_X0_filtered_subreads.fastq.gz m170301_100013_42134_c101174162550000001823269408211761_s1_p0_filtered_subreads.fastq.gz m170301_162825_42134_c101174162550000001823269408211762_s1_p0_filtered_subreads.fastq.gz m170301_225711_42134_c101174162550000001823269408211763_s1_p0_filtered_subreads.fastq.gz m170308_163922_42134_c101174252550000001823269408211742_s1_p0_filtered_subreads.fastq.gz m170308_230815_42134_c101174252550000001823269408211743_s1_p0_filtered_subreads.fastq.gz m170315_001112_42134_c101169372550000001823273008151717_s1_p0_filtered_subreads.fastq.gz m170315_063041_42134_c101169382550000001823273008151700_s1_p0_filtered_subreads.fastq.gz m170315_124938_42134_c101169382550000001823273008151701_s1_p0_filtered_subreads.fastq.gz m170315_190851_42134_c101169382550000001823273008151702_s1_p0_filtered_subreads.fastq.gz -i 151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_1.fq.gz 151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_2.fq.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_1.fq.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_2.fq.gz 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_1.fq.gz 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_2.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCABDLAAPEI-62_1.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCABDLAAPEI-62_2.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCACDTAAPEI-75_1.fq.gz 160103_I137_FCH3V5YBBXX_L3_WHOSTibkDCACDTAAPEI-75_2.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCABDLAAPEI-62_1.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCABDLAAPEI-62_2.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCACDTAAPEI-75_1.fq.gz 160103_I137_FCH3V5YBBXX_L4_WHOSTibkDCACDTAAPEI-75_2.fq.gz 160103_I137_FCH3V5YBBXX_L5_WHOSTibkDCAADWAAPEI-74_1.fq.gz 160103_I137_FCH3V5YBBXX_L5_WHOSTibkDCAADWAAPEI-74_2.fq.gz 160103_I137_FCH3V5YBBXX_L6_WHOSTibkDCAADWAAPEI-74_1.fq.gz 160103_I137_FCH3V5YBBXX_L6_WHOSTibkDCAADWAAPEI-74_2.fq.gz -f oly_pacbio_.contigs.fasta -o /home/data/20171003_redundans_01/') # ### Compare to Sean's redundans runs # #### Set file locations as variables # In[16]: sb_redundans1="/home/owl/scaphapoda/Sean/Oly_Redundans_Output/scaffolds.reduced.fa" sb_redundans2="/home/owl/scaphapoda/Sean/Oly_Redundans_Output_Try_2/scaffolds.reduced.fa" sjw_reduandans="/home/data/20171003_redundans_01/scaffolds.reduced.fa" # #### Run QUAST to assess assemblies # # ##### First, I'll run without --scaffolds option. # In[17]: get_ipython().system('python /usr/local/bioinformatics/quast-4.5/quast.py -t 24 $sb_redundans1 $sb_redundans2 $sjw_reduandans') # In[18]: get_ipython().run_cell_magic('bash', '', 'cat quast_results/results_2017_10_05_14_21_50/report.txt\n') # #### Run with --scaffolds option # In[19]: get_ipython().system('python /usr/local/bioinformatics/quast-4.5/quast.py --scaffolds -t 24 $sb_redundans1 $sb_redundans2 $sjw_reduandans') # In[20]: get_ipython().system('python /usr/local/bioinformatics/quast-4.5/quast.py --scaffolds -t 24 $sb_redundans1 $sb_redundans2 $sjw_reduandans') # In[21]: get_ipython().run_cell_magic('bash', '', 'cat quast_results/results_2017_10_05_14_28_51/report.txt\n') # In[22]: get_ipython().run_cell_magic('bash', '', 'cp -R quast_results/ /home/owl/Athaliana/\n') # In[23]: get_ipython().run_cell_magic('bash', '', 'cp -r quast_results/ /home/owl/Athaliana/\n') # In[24]: get_ipython().run_cell_magic('bash', '', 'mkdir /home/owl/Athaliana/quast_results\n') # In[25]: get_ipython().run_cell_magic('bash', '', 'cp -r quast_results/ /home/owl/Athaliana/quast_results/\n') # In[26]: get_ipython().run_cell_magic('bash', '', 'cp -r quast_results/results_2017_10_05_14_21_50/ /home/owl/Athaliana/quast_results/\n') # In[27]: get_ipython().run_cell_magic('bash', '', 'cp -r quast_results/results_2017_10_05_14_28_51/ /home/owl/Athaliana/quast_results/\n') # In[28]: get_ipython().run_cell_magic('bash', '', 'ls /home/owl/Athaliana/quast_results/\n') # In[29]: get_ipython().run_cell_magic('bash', '', 'rm -rf /home/owl/Athaliana/quast_results/\n') # In[30]: get_ipython().run_cell_magic('bash', '', 'cp -r quast_results/ /home/owl/Athaliana/\n') # In[31]: get_ipython().run_cell_magic('bash', '', 'ls /home/owl/Athaliana/quast_results/\n') # ### Results of first Quast run (i.e. without --scaffolds option) # In[32]: get_ipython().run_cell_magic('html', '', 'http://owl.fish.washington.edu/Athaliana/quast_results/results_2017_10_05_14_21_50/report.html\n') # ### Results of second Quast run (i.e. using --scaffolds option) # In[33]: get_ipython().run_cell_magic('html', '', 'http://owl.fish.washington.edu/Athaliana/quast_results/results_2017_10_05_14_28_51/report.html\n') # In[34]: cp -r /home/data/20171003_redundans_01/ /home/owl/Athaliana/ # In[35]: get_ipython().run_cell_magic('bash', '', 'ls /home/owl/Athaliana/20171003_redundans_01/\n') # In[36]: get_ipython().run_cell_magic('bash', '', 'mv /home/owl/Athaliana/20171003_redundans_01/ /home/owl/Athaliana/20171004_redundans/\n') # In[ ]: