#!/usr/bin/env python # coding: utf-8 # ### Archiving of Ostrea lurida (Olympia oyster) MBD-BSseq Data from ZymoResearch # In[1]: get_ipython().system('date') # ##### Print system info # In[2]: get_ipython().run_cell_magic('bash', '', 'system_profiler SPSoftwareDataType\n') # ##### List files provided by ZymoResearch # In[6]: get_ipython().run_cell_magic('bash', '', 'ls /Volumes/owl_home/mbdseq/\n') # ##### Count the number of files # In[4]: get_ipython().run_cell_magic('bash', '', 'ls -1 /Volumes/owl_home/mbdseq/ | wc -l\n') # ##### Generate md5 checksums file # In[5]: get_ipython().run_cell_magic('bash', '', '\n#For loop generates a md5 checksum has value for each file\n#and appends the output to the checksums.md5 file.\ntime for file in /Volumes/owl_home/mbdseq/*.gz\n do\n md5 "$file" >> /Volumes/owl_home/mbdseq/checksums.md5\n done\n') # ##### Calculate total number of reads generated by this project # In[7]: get_ipython().run_cell_magic('bash', '', '\ntotalreads=0#Initializes variable.\n\n#For loop counts the lines in each file and divides them by four. This is performed because\n#Illumina sequencing files are composed of four lines per read.\n#A running total of the total number of reads is generated [totalreads=$((readcount+totalreads))]\n#and is printed after the for loop completes.\ntime for file in /Volumes/owl_home/mbdseq/*.gz\n do linecount=`gunzip -c "$file" | wc -l`\n readcount=$((linecount/4))\n totalreads=$((readcount+totalreads))\ndone\necho $totalreads\n') # ##### Calculate number of reads per file and append filename and corresponding number of reads to readme file # In[8]: get_ipython().run_cell_magic('bash', '', '\n#For loop counts the lines in each file and divides them by four. This is performed because\n#Illumina sequencing files are composed of four lines per read.\n#Format the output (printf) to print the filename, followed by a tab, followed by the readcount.\n#The command "tee -a" is used to both print the output to the screen and append the output to the readme.md file.\ntime for file in /Volumes/owl_home/mbdseq/*.gz\n do linecount=`gunzip -c "$file" | wc -l`\n readcount=$(($linecount/4))\n printf "%s\\t%s\\n\\n" "${file##*/}" "$readcount" | tee -a /Volumes/owl_web/nightingales/O_lurida/20160203_mbdseq/readme.md\ndone\n') # ##### Move files to Owl web folder # In[12]: get_ipython().run_cell_magic('bash', '', '\ntime mv -n /Volumes/owl_home/mbdseq/*.gz /Volumes/owl_web/nightingales/O_lurida/20160203_mbdseq/\n') # In[14]: get_ipython().run_cell_magic('bash', '', '\ntime mv -nf /Volumes/owl_home/mbdseq/checksums.md5 /Volumes/owl_web/nightingales/O_lurida/20160203_mbdseq/\n') # ##### Verify files have been moved - only "seqfiles.txt" should remain (temp work file) # In[15]: get_ipython().run_cell_magic('bash', '', '\nls /Volumes/owl_home/mbdseq/\n') # In[2]: get_ipython().run_cell_magic('bash', '', "\nls -lh /Volumes/owl_web/nightingales/O_lurida/20160203_mbdseq/ | cut -d ' ' -f7-\n") # In[17]: get_ipython().run_cell_magic('bash', '', '\nls -1 /Volumes/owl_web/nightingales/O_lurida/20160203_mbdseq/ | wc -l\n') # In[ ]: