%%bash
echo "TODAY'S DATE:"
date
echo "------------"
echo ""
#Display operating system info
lsb_release -a
echo ""
echo "------------"
echo "HOSTNAME: "; hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE: Thu Jan 10 09:51:26 PST 2019 ------------ Distributor ID: Ubuntu Description: Ubuntu 16.04.5 LTS Release: 16.04 Codename: xenial ------------ HOSTNAME: swoose ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 24 On-line CPU(s) list: 0-23 Thread(s) per core: 2 Core(s) per socket: 6 Socket(s): 2 NUMA node(s): 1 Vendor ID: GenuineIntel CPU family: 6 Model: 44 Model name: Intel(R) Xeon(R) CPU X5670 @ 2.93GHz Stepping: 2 CPU MHz: 2925.866 BogoMIPS: 5851.93 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 12288K NUMA node0 CPU(s): 0-23 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 popcnt aes lahf_lm epb kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida arat ------------ Memory Specs total used free shared buff/cache available Mem: 70G 7.8G 43G 845M 19G 61G Swap: 4.7G 616M 4.1G
No LSB modules are available.
%env assembly=/home/sam/data/metagenomics/P_generosa/final.contigs.fa
%env out_dir=/home/sam/analyses/20190110_geo_metagnomics_metaquast
%env metaquast=/home/sam/programs/quast-5.0.2/metaquast.py
%env threads=22
env: assembly=/home/sam/data/metagenomics/P_generosa/final.contigs.fa env: out_dir=/home/sam/analyses/20190110_geo_metagnomics_metaquast env: metaquast=/home/sam/programs/quast-5.0.2/metaquast.py env: threads=22
The above cell was changed to include threads.
--parents
option allows creation of multiple, nested directories
%%bash
mkdir --parents /home/sam/data/metagenomics/P_generosa
mkdir --parents /home/sam/analyses/20190110_geo_metagnomics_metaquast
%%bash
rsync \
--archive \
--verbose \
--progress \
gannet:/volume1/web/Atumefaciens/20190102_metagenomics_geo_megahit/megahit_out/final.contigs.fa \
~/data/metagenomics/P_generosa
#Uncomment below to download via web URL
# Will need to set desired path with --directory-prefix
#wget \
#--directory-prefix=/home/sam/data/metagenomics/P_generosa/ \
#https://gannet.fish.washington.edu/Atumefaciens/20190102_metagenomics_geo_megahit/megahit_out/final.contigs.fa
echo "-------------------------"
ls -lh ${assembly}
receiving incremental file list final.contigs.fa 2,341,712,011 100% 81.51MB/s 0:00:27 (xfr#1, to-chk=0/1) sent 30 bytes received 2,341,997,968 bytes 82,175,368.35 bytes/sec total size is 2,341,712,011 speedup is 1.00 ------------------------- -rw-r--r-- 1 sam users 2.2G Jan 3 06:08 /home/sam/data/metagenomics/P_generosa/final.contigs.fa
%%bash
cd ${out_dir}
time \
python ${metaquast} ${assembly} \
1> stdout.txt \
2> sterr.txt
Process is terminated.
Killed process to add threads
variable to variables cell.
%%bash
cd ${out_dir}
time \
python ${metaquast} ${assembly} \
--threads ${threads}
1> stdout.txt \
2> sterr.txt
Process is terminated.
Noticed missing command continuation slash after threads
command
%%bash
cd ${out_dir}
time \
python ${metaquast} ${assembly} \
--threads ${threads} \
1> stdout.txt \
2> sterr.txt
real 45m52.835s user 201m29.228s sys 11m50.816s
--------------------------------------------------------------------------- CalledProcessError Traceback (most recent call last) <ipython-input-9-86f57cd2130f> in <module> ----> 1 get_ipython().run_cell_magic('bash', '', 'cd ${out_dir}\n\ntime \\\npython ${metaquast} ${assembly} \\\n--threads ${threads} \\\n1> stdout.txt \\\n2> sterr.txt\n') ~/programs/minicocnda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell) 2321 magic_arg_s = self.var_expand(line, stack_depth) 2322 with self.builtin_trap: -> 2323 result = fn(magic_arg_s, cell) 2324 return result 2325 ~/programs/minicocnda3/lib/python3.7/site-packages/IPython/core/magics/script.py in named_script_magic(line, cell) 140 else: 141 line = script --> 142 return self.shebang(line, cell) 143 144 # write a basic docstring: <decorator-gen-109> in shebang(self, line, cell) ~/programs/minicocnda3/lib/python3.7/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k) 185 # but it's overkill for just that one bit of state. 186 def magic_deco(arg): --> 187 call = lambda f, *a, **k: f(*a, **k) 188 189 if callable(arg): ~/programs/minicocnda3/lib/python3.7/site-packages/IPython/core/magics/script.py in shebang(self, line, cell) 243 sys.stderr.flush() 244 if args.raise_error and p.returncode!=0: --> 245 raise CalledProcessError(p.returncode, cell, output=out, stderr=err) 246 247 def _run_script(self, p, cell, to_close): CalledProcessError: Command 'b'cd ${out_dir}\n\ntime \\\npython ${metaquast} ${assembly} \\\n--threads ${threads} \\\n1> stdout.txt \\\n2> sterr.txt\n'' returned non-zero exit status 148.
%%bash
cat ${out_dir}/stdout.txt
/home/sam/programs/quast-5.0.2/metaquast.py /home/sam/data/metagenomics/P_generosa/final.contigs.fa --threads 22 Version: 5.0.2 System information: OS: Linux-4.4.0-139-generic-x86_64-with-debian-stretch-sid (linux_64) Python version: 3.7.2 CPUs number: 24 Started: 2019-01-10 10:09:05 Logging to /home/sam/analyses/20190110_geo_metagnomics_metaquast/quast_results/results_2019_01_10_10_09_05/metaquast.log Contigs: Pre-processing... /home/sam/data/metagenomics/P_generosa/final.contigs.fa ==> final.contigs No references are provided, starting to search for reference genomes in SILVA 16S rRNA database and to download them from NCBI... 2019-01-10 10:10:37 Running BlastN.. processing final.contigs BLAST results for final.contigs are saved to /home/sam/analyses/20190110_geo_metagnomics_metaquast/quast_results/results_2019_01_10_10_09_05/quast_downloaded_references/blast.res_final-contigs... 2019-01-10 10:54:54 Trying to download found references from NCBI. Totally 50 organisms to try. unidentified_proteobacterium | not found in the NCBI database unidentified_proteobacterium was not found in NCBI database, trying to download the next best match ERROR! Cannot established internet connection to download reference genomes! Check internet connection or run MetaQUAST with option "--max-ref-number 0". In case you have troubles running QUAST, you can write to quast.support@cab.spbu.ru or report an issue on our GitHub repository https://github.com/ablab/quast/issues Please provide us with quast.log file from the output directory.
Well, that's weird. Let's try it again; maybe there was a blip in internet access? Hopefully this isn't related to government shutdown...
%%bash
cd ${out_dir}
time \
python ${metaquast} ${assembly} \
--threads ${threads} \
1> stdout.txt \
2> sterr.txt
real 45m52.503s user 200m24.208s sys 11m50.328s
--------------------------------------------------------------------------- CalledProcessError Traceback (most recent call last) <ipython-input-11-86f57cd2130f> in <module> ----> 1 get_ipython().run_cell_magic('bash', '', 'cd ${out_dir}\n\ntime \\\npython ${metaquast} ${assembly} \\\n--threads ${threads} \\\n1> stdout.txt \\\n2> sterr.txt\n') ~/programs/minicocnda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell) 2321 magic_arg_s = self.var_expand(line, stack_depth) 2322 with self.builtin_trap: -> 2323 result = fn(magic_arg_s, cell) 2324 return result 2325 ~/programs/minicocnda3/lib/python3.7/site-packages/IPython/core/magics/script.py in named_script_magic(line, cell) 140 else: 141 line = script --> 142 return self.shebang(line, cell) 143 144 # write a basic docstring: <decorator-gen-109> in shebang(self, line, cell) ~/programs/minicocnda3/lib/python3.7/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k) 185 # but it's overkill for just that one bit of state. 186 def magic_deco(arg): --> 187 call = lambda f, *a, **k: f(*a, **k) 188 189 if callable(arg): ~/programs/minicocnda3/lib/python3.7/site-packages/IPython/core/magics/script.py in shebang(self, line, cell) 243 sys.stderr.flush() 244 if args.raise_error and p.returncode!=0: --> 245 raise CalledProcessError(p.returncode, cell, output=out, stderr=err) 246 247 def _run_script(self, p, cell, to_close): CalledProcessError: Command 'b'cd ${out_dir}\n\ntime \\\npython ${metaquast} ${assembly} \\\n--threads ${threads} \\\n1> stdout.txt \\\n2> sterr.txt\n'' returned non-zero exit status 148.
%%bash
cat ${out_dir}/stdout.txt
/home/sam/programs/quast-5.0.2/metaquast.py /home/sam/data/metagenomics/P_generosa/final.contigs.fa --threads 22 Version: 5.0.2 System information: OS: Linux-4.4.0-139-generic-x86_64-with-debian-stretch-sid (linux_64) Python version: 3.7.2 CPUs number: 24 Started: 2019-01-10 11:10:58 Logging to /home/sam/analyses/20190110_geo_metagnomics_metaquast/quast_results/results_2019_01_10_11_10_58/metaquast.log Contigs: Pre-processing... /home/sam/data/metagenomics/P_generosa/final.contigs.fa ==> final.contigs No references are provided, starting to search for reference genomes in SILVA 16S rRNA database and to download them from NCBI... 2019-01-10 11:12:29 Running BlastN.. processing final.contigs BLAST results for final.contigs are saved to /home/sam/analyses/20190110_geo_metagnomics_metaquast/quast_results/results_2019_01_10_11_10_58/quast_downloaded_references/blast.res_final-contigs... 2019-01-10 11:56:47 Trying to download found references from NCBI. Totally 50 organisms to try. unidentified_proteobacterium | not found in the NCBI database unidentified_proteobacterium was not found in NCBI database, trying to download the next best match ERROR! Cannot established internet connection to download reference genomes! Check internet connection or run MetaQUAST with option "--max-ref-number 0". In case you have troubles running QUAST, you can write to quast.support@cab.spbu.ru or report an issue on our GitHub repository https://github.com/ablab/quast/issues Please provide us with quast.log file from the output directory.
Well, while I wait to hear from the developer, I'll run this with the --max-ref-number 0
option and see how it looks.
%%bash
cd ${out_dir}
time \
python ${metaquast} ${assembly} \
--max-ref-number 0 \
--threads ${threads} \
1> stdout.txt \
2> sterr.txt
ls -lhR
.: total 16K drwxrwxr-x 7 sam sam 4.0K Jan 10 14:04 quast_results -rw-rw-r-- 1 sam sam 4.1K Jan 10 14:10 stdout.txt -rw-rw-r-- 1 sam sam 187 Jan 10 14:10 sterr.txt ./quast_results: total 20K lrwxrwxrwx 1 sam sam 27 Jan 10 14:04 latest -> results_2019_01_10_14_04_35 drwxrwxr-x 3 sam sam 4.0K Jan 10 10:04 results_2019_01_10_10_04_44 drwxrwxr-x 3 sam sam 4.0K Jan 10 10:07 results_2019_01_10_10_07_30 drwxrwxr-x 4 sam sam 4.0K Jan 10 10:10 results_2019_01_10_10_09_05 drwxrwxr-x 4 sam sam 4.0K Jan 10 11:12 results_2019_01_10_11_10_58 drwxrwxr-x 4 sam sam 4.0K Jan 10 14:10 results_2019_01_10_14_04_35 ./quast_results/results_2019_01_10_10_04_44: total 8.0K -rw-rw-r-- 1 sam sam 521 Jan 10 10:04 metaquast.log drwxrwxr-x 2 sam sam 4.0K Jan 10 10:04 quast_corrected_input ./quast_results/results_2019_01_10_10_04_44/quast_corrected_input: total 0 ./quast_results/results_2019_01_10_10_07_30: total 8.0K -rw-rw-r-- 1 sam sam 446 Jan 10 10:07 metaquast.log drwxrwxr-x 2 sam sam 4.0K Jan 10 10:07 quast_corrected_input ./quast_results/results_2019_01_10_10_07_30/quast_corrected_input: total 0 ./quast_results/results_2019_01_10_10_09_05: total 12K -rw-rw-r-- 1 sam sam 2.0K Jan 10 10:54 metaquast.log drwxrwxr-x 2 sam sam 4.0K Jan 10 10:09 quast_corrected_input drwxrwxr-x 2 sam sam 4.0K Jan 10 10:54 quast_downloaded_references ./quast_results/results_2019_01_10_10_09_05/quast_corrected_input: total 0 ./quast_results/results_2019_01_10_10_09_05/quast_downloaded_references: total 408M -rw-rw-r-- 1 sam sam 113 Jan 10 10:54 blast.check_final-contigs -rw-rw-r-- 1 sam sam 0 Jan 10 10:10 blast.err -rw-rw-r-- 1 sam sam 408M Jan 10 10:54 blast.res_final-contigs ./quast_results/results_2019_01_10_11_10_58: total 12K -rw-rw-r-- 1 sam sam 2.0K Jan 10 11:56 metaquast.log drwxrwxr-x 2 sam sam 4.0K Jan 10 11:10 quast_corrected_input drwxrwxr-x 2 sam sam 4.0K Jan 10 11:56 quast_downloaded_references ./quast_results/results_2019_01_10_11_10_58/quast_corrected_input: total 0 ./quast_results/results_2019_01_10_11_10_58/quast_downloaded_references: total 408M -rw-rw-r-- 1 sam sam 113 Jan 10 11:56 blast.check_final-contigs -rw-rw-r-- 1 sam sam 0 Jan 10 11:12 blast.err -rw-rw-r-- 1 sam sam 408M Jan 10 11:56 blast.res_final-contigs ./quast_results/results_2019_01_10_14_04_35: total 796K drwxrwxr-x 2 sam sam 4.0K Jan 10 14:09 basic_stats -rw-rw-r-- 1 sam sam 53K Jan 10 14:10 icarus.html drwxrwxr-x 2 sam sam 4.0K Jan 10 14:10 icarus_viewers -rw-rw-r-- 1 sam sam 747 Jan 10 14:06 metaquast.log -rw-rw-r-- 1 sam sam 3.4K Jan 10 14:10 quast.log -rw-rw-r-- 1 sam sam 670K Jan 10 14:10 report.html -rw-rw-r-- 1 sam sam 28K Jan 10 14:10 report.pdf -rw-rw-r-- 1 sam sam 1.3K Jan 10 14:09 report.tex -rw-rw-r-- 1 sam sam 554 Jan 10 14:09 report.tsv -rw-rw-r-- 1 sam sam 1.1K Jan 10 14:09 report.txt -rw-rw-r-- 1 sam sam 1.1K Jan 10 14:09 transposed_report.tex -rw-rw-r-- 1 sam sam 554 Jan 10 14:09 transposed_report.tsv -rw-rw-r-- 1 sam sam 989 Jan 10 14:09 transposed_report.txt ./quast_results/results_2019_01_10_14_04_35/basic_stats: total 64K -rw-rw-r-- 1 sam sam 14K Jan 10 14:09 cumulative_plot.pdf -rw-rw-r-- 1 sam sam 13K Jan 10 14:09 final.contigs_GC_content_plot.pdf -rw-rw-r-- 1 sam sam 14K Jan 10 14:09 GC_content_plot.pdf -rw-rw-r-- 1 sam sam 14K Jan 10 14:09 Nx_plot.pdf ./quast_results/results_2019_01_10_14_04_35/icarus_viewers: total 824K -rw-rw-r-- 1 sam sam 823K Jan 10 14:10 contig_size_viewer.html
real 5m38.084s user 5m27.896s sys 0m12.744s
%%bash
cat ${out_dir}/quast_results/results_2019_01_10_14_04_35/metaquast.log
/home/sam/programs/quast-5.0.2/metaquast.py /home/sam/data/metagenomics/P_generosa/final.contigs.fa --max-ref-number 0 --threads 22 Version: 5.0.2 System information: OS: Linux-4.4.0-139-generic-x86_64-with-debian-stretch-sid (linux_64) Python version: 3.7.2 CPUs number: 24 Started: 2019-01-10 14:04:35 Logging to /home/sam/analyses/20190110_geo_metagnomics_metaquast/quast_results/results_2019_01_10_14_04_35/metaquast.log Contigs: Pre-processing... /home/sam/data/metagenomics/P_generosa/final.contigs.fa ==> final.contigs NOTICE: Maximum number of references (--max-ref-number) is set to 0, search in SILVA 16S rRNA database is disabled NOTICE: No references are provided, starting regular QUAST with MetaGeneMark gene finder
OK, that didn't do anything other than just run regular QUAST. Going to give the initial command another go and see if the internet connection succeeds this time (not sure why it would, though)...
%%bash
cd ${out_dir}
time \
python ${metaquast} ${assembly} \
--threads ${threads} \
1> stdout.txt \
2> sterr.txt