#!/usr/bin/env python # coding: utf-8 # ### Archiving of Ostrea lurida (Olympia oyster) GBS data from BGI # In[1]: get_ipython().system('date') # ##### Print system info # In[2]: get_ipython().run_cell_magic('bash', '', 'system_profiler SPSoftwareDataType\n') # ##### List files provided by BGI # In[3]: get_ipython().run_cell_magic('bash', '', 'ls /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/\n') # ##### Count the number of FASTQ files # In[4]: get_ipython().run_cell_magic('bash', '', 'ls -1 /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz | wc -l\n') # ##### Generate md5 checksums file # In[8]: get_ipython().run_cell_magic('bash', '', '\n#For loop generates a md5 checksum has value for each file\n#and appends the output to the checksums.md5 file.\ntime for file in /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz\n do\n md5 "$file" >> /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/checksums.md5\n done\n') # ##### Calculate total number of reads generated by this project # In[9]: get_ipython().run_cell_magic('bash', '', '\n#Initializes variable.\ntotalreads=0\n\n#For loop counts the lines in each file and divides them by four. This is performed because\n#Illumina sequencing files are composed of four lines per read.\n#A running total of the total number of reads is generated [totalreads=$((readcount+totalreads))]\n#and is printed after the for loop completes.\ntime for file in /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz\n do linecount=`gunzip -c "$file" | wc -l`\n readcount=$((linecount/4))\n totalreads=$((readcount+totalreads))\ndone\necho $totalreads\n') # ##### Calculate number of reads per file and append filename and corresponding number of reads to readme file # In[10]: get_ipython().run_cell_magic('bash', '', '\n#For loop counts the lines in each file and divides them by four. This is performed because\n#Illumina sequencing files are composed of four lines per read.\n#Format the output (printf) to print the filename, followed by a tab, followed by the readcount.\n#The command "tee -a" is used to both print the output to the screen and append the output to the readme.md file.\ntime for file in /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz\n do linecount=`gunzip -c "$file" | wc -l`\n readcount=$(($linecount/4))\n printf "%s\\t%s\\n\\n" "${file##*/}" "$readcount" | tee -a /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/readme.md\ndone\n')