#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().system('date') # In[2]: get_ipython().run_cell_magic('bash', '', 'system_profiler SPSoftwareDataType\n') # In[6]: get_ipython().run_cell_magic('bash', '', '#Uses grep to exclude lines that display serial number and hardware UUID\nsystem_profiler SPHardwareDataType | grep -v [SH][ea]\n') # #### List files provided by BGI # In[3]: get_ipython().run_cell_magic('bash', '', 'ls /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/\n') # #### Create checksums file # In[5]: get_ipython().run_cell_magic('bash', '', '\n#For loop generates a md5 checksum has value for each file\n#and appends the output to the checksums.md5 file.\ntime for file in /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/*.gz\n do\n md5 "$file" >> /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/checksums.md5\n done\n') # #### Calculate total number of reads generated by this project.ΒΆ # #### Calculate number of reads per file, append filename and corresponding number of reads to readme file. # In[7]: get_ipython().run_cell_magic('bash', '', '\n#Initializes variable.\ntotalreads=0\n\n#For loop counts the lines in each file and divides them by four. This is performed because\n#Illumina sequencing files are composed of four lines per read.\n#A running total of the total number of reads is generated [totalreads=$((readcount+totalreads))]\n#and is printed after the for loop completes.\n\n#Format the output (printf) to print the filename, followed by a tab, followed by the readcount.\n#The command "tee -a" is used to both print the output to the screen and append the output to the readme.md file.\ntime for file in /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/*.gz\n do linecount=`gunzip -c "$file" | wc -l`\n readcount=$((linecount/4))\n totalreads=$((readcount+totalreads))\n printf "%s\\t%s\\n" "${file##*/}" "$readcount" | tee -a /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/readme.md\ndone\necho $totalreads\n') # #### Count the number of sequences in the scafSeq (FASTA format) file # In[8]: get_ipython().run_cell_magic('bash', '', 'time grep ">" /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/Ostrea_lurida.scafSeq | wc -l\n') # In[ ]: