!date
Mon May 16 08:27:10 PDT 2016
%%bash
system_profiler SPSoftwareDataType
Software: System Software Overview: System Version: Mac OS X 10.7.5 (11G63) Kernel Version: Darwin 11.4.2 Boot Volume: SSD2 Boot Mode: Normal Computer Name: greenbird (2) User Name: Sam (Sam) Secure Virtual Memory: Enabled 64-bit Kernel and Extensions: No Time since boot: 5 days 1:46
%%bash
#Uses grep to exclude lines that display serial number and hardware UUID
system_profiler SPHardwareDataType | grep -v [SH][ea]
Model Name: Mac Pro Model Identifier: MacPro1,1 Processor Name: Dual-Core Intel Xeon Processor Speed: 3 GHz Number of Processors: 2 Total Number of Cores: 4 L2 Cache (per Processor): 4 MB Memory: 14 GB Bus Speed: 1.33 GHz Boot ROM Version: MP11.005C.B08 SMC Version (system): 1.7f10
%%bash
ls /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/
151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_1.fq.gz.clean.dup.clean.gz 151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_2.fq.gz.clean.dup.clean.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_1.fq.gz.clean.dup.clean.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_2.fq.gz.clean.dup.clean.gz 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_1.fq.gz.clean.dup.clean.gz 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_2.fq.gz.clean.dup.clean.gz 20160512_F15FTSUSAT0327_genome_survey.pdf Ostrea_lurida.GC_content_vs_depth.png Ostrea_lurida.scafSeq README md5.txt
%%bash
#For loop generates a md5 checksum has value for each file
#and appends the output to the checksums.md5 file.
time for file in /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/*.gz
do
md5 "$file" >> /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/checksums.md5
done
real 43m39.240s user 1m32.906s sys 2m27.982s
%%bash
#Initializes variable.
totalreads=0
#For loop counts the lines in each file and divides them by four. This is performed because
#Illumina sequencing files are composed of four lines per read.
#A running total of the total number of reads is generated [totalreads=$((readcount+totalreads))]
#and is printed after the for loop completes.
#Format the output (printf) to print the filename, followed by a tab, followed by the readcount.
#The command "tee -a" is used to both print the output to the screen and append the output to the readme.md file.
time for file in /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/*.gz
do linecount=`gunzip -c "$file" | wc -l`
readcount=$((linecount/4))
totalreads=$((readcount+totalreads))
printf "%s\t%s\n" "${file##*/}" "$readcount" | tee -a /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/readme.md
done
echo $totalreads
151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_1.fq.gz.clean.dup.clean.gz 61253141 151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_2.fq.gz.clean.dup.clean.gz 61253141 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_1.fq.gz.clean.dup.clean.gz 58755925 151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_2.fq.gz.clean.dup.clean.gz 58755925 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_1.fq.gz.clean.dup.clean.gz 43938762 151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_2.fq.gz.clean.dup.clean.gz 43938762 327895656
real 59m24.186s user 19m15.868s sys 3m29.110s
%%bash
time grep ">" /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/Ostrea_lurida.scafSeq | wc -l
765755
real 1m7.420s user 0m0.919s sys 0m2.288s