Archiving of Ostrea lurida (Olympia oyster) GBS data from BGI

In [1]:
!date
Mon Mar 14 16:36:33 PDT 2016
In [2]:
%%bash
system_profiler SPSoftwareDataType
Software:

    System Software Overview:

      System Version: OS X 10.9.5 (13F34)
      Kernel Version: Darwin 13.4.0
      Boot Volume: Hummingbird
      Boot Mode: Normal
      Computer Name: hummingbird
      User Name: Sam (Sam)
      Secure Virtual Memory: Enabled
      Time since boot: 115 days 1:27

List files provided by BGI
In [3]:
%%bash
ls /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/
1HL_10A_1.fq.gz
1HL_10A_2.fq.gz
1HL_11A_1.fq.gz
1HL_11A_2.fq.gz
1HL_12A_1.fq.gz
1HL_12A_2.fq.gz
1HL_13A_1.fq.gz
1HL_13A_2.fq.gz
1HL_14A_1.fq.gz
1HL_14A_2.fq.gz
1HL_15A_1.fq.gz
1HL_15A_2.fq.gz
1HL_16A_1.fq.gz
1HL_16A_2.fq.gz
1HL_17A_1.fq.gz
1HL_17A_2.fq.gz
1HL_19A_1.fq.gz
1HL_19A_2.fq.gz
1HL_1A_1.fq.gz
1HL_1A_2.fq.gz
1HL_20A_1.fq.gz
1HL_20A_2.fq.gz
1HL_21A_1.fq.gz
1HL_21A_2.fq.gz
1HL_22A_1.fq.gz
1HL_22A_2.fq.gz
1HL_23A_1.fq.gz
1HL_23A_2.fq.gz
1HL_24A_1.fq.gz
1HL_24A_2.fq.gz
1HL_25A_1.fq.gz
1HL_25A_2.fq.gz
1HL_26A_1.fq.gz
1HL_26A_2.fq.gz
1HL_27A_1.fq.gz
1HL_27A_2.fq.gz
1HL_28A_1.fq.gz
1HL_28A_2.fq.gz
1HL_29A_1.fq.gz
1HL_29A_2.fq.gz
1HL_2A_1.fq.gz
1HL_2A_2.fq.gz
1HL_31A_1.fq.gz
1HL_31A_2.fq.gz
1HL_33A_1.fq.gz
1HL_33A_2.fq.gz
1HL_34A_1.fq.gz
1HL_34A_2.fq.gz
1HL_35A_1.fq.gz
1HL_35A_2.fq.gz
1HL_3A_1.fq.gz
1HL_3A_2.fq.gz
1HL_4A_1.fq.gz
1HL_4A_2.fq.gz
1HL_5A_1.fq.gz
1HL_5A_2.fq.gz
1HL_6A_1.fq.gz
1HL_6A_2.fq.gz
1HL_7A_1.fq.gz
1HL_7A_2.fq.gz
1HL_8A_1.fq.gz
1HL_8A_2.fq.gz
1HL_9A_1.fq.gz
1HL_9A_2.fq.gz
1NF_10A_1.fq.gz
1NF_10A_2.fq.gz
1NF_11A_1.fq.gz
1NF_11A_2.fq.gz
1NF_12A_1.fq.gz
1NF_12A_2.fq.gz
1NF_13A_1.fq.gz
1NF_13A_2.fq.gz
1NF_14A_1.fq.gz
1NF_14A_2.fq.gz
1NF_15A_1.fq.gz
1NF_15A_2.fq.gz
1NF_16A_1.fq.gz
1NF_16A_2.fq.gz
1NF_17A_1.fq.gz
1NF_17A_2.fq.gz
1NF_18A_1.fq.gz
1NF_18A_2.fq.gz
1NF_19A_1.fq.gz
1NF_19A_2.fq.gz
1NF_1A_1.fq.gz
1NF_1A_2.fq.gz
1NF_20A_1.fq.gz
1NF_20A_2.fq.gz
1NF_21A_1.fq.gz
1NF_21A_2.fq.gz
1NF_22A_1.fq.gz
1NF_22A_2.fq.gz
1NF_23A_1.fq.gz
1NF_23A_2.fq.gz
1NF_24A_1.fq.gz
1NF_24A_2.fq.gz
1NF_25A_1.fq.gz
1NF_25A_2.fq.gz
1NF_26A_1.fq.gz
1NF_26A_2.fq.gz
1NF_27A_1.fq.gz
1NF_27A_2.fq.gz
1NF_28A_1.fq.gz
1NF_28A_2.fq.gz
1NF_29A_1.fq.gz
1NF_29A_2.fq.gz
1NF_2A_1.fq.gz
1NF_2A_2.fq.gz
1NF_30A_1.fq.gz
1NF_30A_2.fq.gz
1NF_31A_1.fq.gz
1NF_31A_2.fq.gz
1NF_32A_1.fq.gz
1NF_32A_2.fq.gz
1NF_33A_1.fq.gz
1NF_33A_2.fq.gz
1NF_4A_1.fq.gz
1NF_4A_2.fq.gz
1NF_5A_1.fq.gz
1NF_5A_2.fq.gz
1NF_6A_1.fq.gz
1NF_6A_2.fq.gz
1NF_7A_1.fq.gz
1NF_7A_2.fq.gz
1NF_8A_1.fq.gz
1NF_8A_2.fq.gz
1NF_9A_1.fq.gz
1NF_9A_2.fq.gz
1SN_10A_1.fq.gz
1SN_10A_2.fq.gz
1SN_11A_1.fq.gz
1SN_11A_2.fq.gz
1SN_12A_1.fq.gz
1SN_12A_2.fq.gz
1SN_13A_1.fq.gz
1SN_13A_2.fq.gz
1SN_14A_1.fq.gz
1SN_14A_2.fq.gz
1SN_15A_1.fq.gz
1SN_15A_2.fq.gz
1SN_16A_1.fq.gz
1SN_16A_2.fq.gz
1SN_17A_1.fq.gz
1SN_17A_2.fq.gz
1SN_18A_1.fq.gz
1SN_18A_2.fq.gz
1SN_19A_1.fq.gz
1SN_19A_2.fq.gz
1SN_1A_1.fq.gz
1SN_1A_2.fq.gz
1SN_20A_1.fq.gz
1SN_20A_2.fq.gz
1SN_21A_1.fq.gz
1SN_21A_2.fq.gz
1SN_22A_1.fq.gz
1SN_22A_2.fq.gz
1SN_23A_1.fq.gz
1SN_23A_2.fq.gz
1SN_24A_1.fq.gz
1SN_24A_2.fq.gz
1SN_25A_1.fq.gz
1SN_25A_2.fq.gz
1SN_26A_1.fq.gz
1SN_26A_2.fq.gz
1SN_27A_1.fq.gz
1SN_27A_2.fq.gz
1SN_28A_1.fq.gz
1SN_28A_2.fq.gz
1SN_29A_1.fq.gz
1SN_29A_2.fq.gz
1SN_2A_1.fq.gz
1SN_2A_2.fq.gz
1SN_30A_1.fq.gz
1SN_30A_2.fq.gz
1SN_31A_1.fq.gz
1SN_31A_2.fq.gz
1SN_32A_1.fq.gz
1SN_32A_2.fq.gz
1SN_3A_1.fq.gz
1SN_3A_2.fq.gz
1SN_4A_1.fq.gz
1SN_4A_2.fq.gz
1SN_5A_1.fq.gz
1SN_5A_2.fq.gz
1SN_6A_1.fq.gz
1SN_6A_2.fq.gz
1SN_7A_1.fq.gz
1SN_7A_2.fq.gz
1SN_8A_1.fq.gz
1SN_8A_2.fq.gz
1SN_9A_1.fq.gz
1SN_9A_2.fq.gz
UnKnow_1.fq.gz
UnKnow_2.fq.gz
Upload.tar.gz
md5.check
md5.txt
Count the number of FASTQ files
In [4]:
%%bash
ls -1 /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz | wc -l
     194
Generate md5 checksums file
In [8]:
%%bash

#For loop generates a md5 checksum has value for each file
#and appends the output to the checksums.md5 file.
time for file in /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz
    do
        md5 "$file" >> /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/checksums.md5
        done
real	14m6.296s
user	1m33.226s
sys	0m27.272s
Calculate total number of reads generated by this project
In [9]:
%%bash

#Initializes variable.
totalreads=0

#For loop counts the lines in each file and divides them by four. This is performed because
#Illumina sequencing files are composed of four lines per read.
#A running total of the total number of reads is generated [totalreads=$((readcount+totalreads))]
#and is printed after the for loop completes.
time for file in /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz
    do linecount=`gunzip -c "$file" | wc -l`
    readcount=$((linecount/4))
    totalreads=$((readcount+totalreads))
done
echo $totalreads
557596520
real	31m30.449s
user	16m21.149s
sys	1m54.328s
Calculate number of reads per file and append filename and corresponding number of reads to readme file
In [10]:
%%bash

#For loop counts the lines in each file and divides them by four. This is performed because
#Illumina sequencing files are composed of four lines per read.
#Format the output (printf) to print the filename, followed by a tab, followed by the readcount.
#The command "tee -a" is used to both print the output to the screen and append the output to the readme.md file.
time for file in /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz
    do linecount=`gunzip -c "$file" | wc -l`
    readcount=$(($linecount/4))
    printf "%s\t%s\n\n" "${file##*/}" "$readcount" | tee -a  /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/readme.md
done
1HL_10A_1.fq.gz	3186565

1HL_10A_2.fq.gz	3186565

1HL_11A_1.fq.gz	3568503

1HL_11A_2.fq.gz	3568503

1HL_12A_1.fq.gz	2524813

1HL_12A_2.fq.gz	2524813

1HL_13A_1.fq.gz	2676425

1HL_13A_2.fq.gz	2676425

1HL_14A_1.fq.gz	2015611

1HL_14A_2.fq.gz	2015611

1HL_15A_1.fq.gz	2196324

1HL_15A_2.fq.gz	2196324

1HL_16A_1.fq.gz	2333158

1HL_16A_2.fq.gz	2333158

1HL_17A_1.fq.gz	3653761

1HL_17A_2.fq.gz	3653761

1HL_19A_1.fq.gz	3617984

1HL_19A_2.fq.gz	3617984

1HL_1A_1.fq.gz	2260430

1HL_1A_2.fq.gz	2260430

1HL_20A_1.fq.gz	2908938

1HL_20A_2.fq.gz	2908938

1HL_21A_1.fq.gz	2591845

1HL_21A_2.fq.gz	2591845

1HL_22A_1.fq.gz	2074229

1HL_22A_2.fq.gz	2074229

1HL_23A_1.fq.gz	3609794

1HL_23A_2.fq.gz	3609794

1HL_24A_1.fq.gz	2721808

1HL_24A_2.fq.gz	2721808

1HL_25A_1.fq.gz	2957874

1HL_25A_2.fq.gz	2957874

1HL_26A_1.fq.gz	3142369

1HL_26A_2.fq.gz	3142369

1HL_27A_1.fq.gz	3199649

1HL_27A_2.fq.gz	3199649

1HL_28A_1.fq.gz	3770238

1HL_28A_2.fq.gz	3770238

1HL_29A_1.fq.gz	3071205

1HL_29A_2.fq.gz	3071205

1HL_2A_1.fq.gz	2758150

1HL_2A_2.fq.gz	2758150

1HL_31A_1.fq.gz	2453753

1HL_31A_2.fq.gz	2453753

1HL_33A_1.fq.gz	2708943

1HL_33A_2.fq.gz	2708943

1HL_34A_1.fq.gz	2899019

1HL_34A_2.fq.gz	2899019

1HL_35A_1.fq.gz	2630313

1HL_35A_2.fq.gz	2630313

1HL_3A_1.fq.gz	2211401

1HL_3A_2.fq.gz	2211401

1HL_4A_1.fq.gz	2571031

1HL_4A_2.fq.gz	2571031

1HL_5A_1.fq.gz	1980666

1HL_5A_2.fq.gz	1980666

1HL_6A_1.fq.gz	2070051

1HL_6A_2.fq.gz	2070051

1HL_7A_1.fq.gz	2793544

1HL_7A_2.fq.gz	2793544

1HL_8A_1.fq.gz	2544013

1HL_8A_2.fq.gz	2544013

1HL_9A_1.fq.gz	2832945

1HL_9A_2.fq.gz	2832945

1NF_10A_1.fq.gz	2149301

1NF_10A_2.fq.gz	2149301

1NF_11A_1.fq.gz	2266647

1NF_11A_2.fq.gz	2266647

1NF_12A_1.fq.gz	2179923

1NF_12A_2.fq.gz	2179923

1NF_13A_1.fq.gz	3890118

1NF_13A_2.fq.gz	3890118

1NF_14A_1.fq.gz	2719198

1NF_14A_2.fq.gz	2719198

1NF_15A_1.fq.gz	2554324

1NF_15A_2.fq.gz	2554324

1NF_16A_1.fq.gz	2682948

1NF_16A_2.fq.gz	2682948

1NF_17A_1.fq.gz	3119328

1NF_17A_2.fq.gz	3119328

1NF_18A_1.fq.gz	1750070

1NF_18A_2.fq.gz	1750070

1NF_19A_1.fq.gz	2036640

1NF_19A_2.fq.gz	2036640

1NF_1A_1.fq.gz	1355760

1NF_1A_2.fq.gz	1355760

1NF_20A_1.fq.gz	2457026

1NF_20A_2.fq.gz	2457026

1NF_21A_1.fq.gz	3046937

1NF_21A_2.fq.gz	3046937

1NF_22A_1.fq.gz	2895176

1NF_22A_2.fq.gz	2895176

1NF_23A_1.fq.gz	3206742

1NF_23A_2.fq.gz	3206742

1NF_24A_1.fq.gz	3038626

1NF_24A_2.fq.gz	3038626

1NF_25A_1.fq.gz	2203229

1NF_25A_2.fq.gz	2203229

1NF_26A_1.fq.gz	2934794

1NF_26A_2.fq.gz	2934794

1NF_27A_1.fq.gz	2272652

1NF_27A_2.fq.gz	2272652

1NF_28A_1.fq.gz	2344763

1NF_28A_2.fq.gz	2344763

1NF_29A_1.fq.gz	3829981

1NF_29A_2.fq.gz	3829981

1NF_2A_1.fq.gz	3452931

1NF_2A_2.fq.gz	3452931

1NF_30A_1.fq.gz	2701047

1NF_30A_2.fq.gz	2701047

1NF_31A_1.fq.gz	3561574

1NF_31A_2.fq.gz	3561574

1NF_32A_1.fq.gz	3477986

1NF_32A_2.fq.gz	3477986

1NF_33A_1.fq.gz	2602951

1NF_33A_2.fq.gz	2602951

1NF_4A_1.fq.gz	3582899

1NF_4A_2.fq.gz	3582899

1NF_5A_1.fq.gz	2765984

1NF_5A_2.fq.gz	2765984

1NF_6A_1.fq.gz	2423888

1NF_6A_2.fq.gz	2423888

1NF_7A_1.fq.gz	2590507

1NF_7A_2.fq.gz	2590507

1NF_8A_1.fq.gz	2772580

1NF_8A_2.fq.gz	2772580

1NF_9A_1.fq.gz	3380498

1NF_9A_2.fq.gz	3380498

1SN_10A_1.fq.gz	2558667

1SN_10A_2.fq.gz	2558667

1SN_11A_1.fq.gz	2583992

1SN_11A_2.fq.gz	2583992

1SN_12A_1.fq.gz	3423902

1SN_12A_2.fq.gz	3423902

1SN_13A_1.fq.gz	2956991

1SN_13A_2.fq.gz	2956991

1SN_14A_1.fq.gz	3544290

1SN_14A_2.fq.gz	3544290

1SN_15A_1.fq.gz	2506325

1SN_15A_2.fq.gz	2506325

1SN_16A_1.fq.gz	2616966

1SN_16A_2.fq.gz	2616966

1SN_17A_1.fq.gz	2983401

1SN_17A_2.fq.gz	2983401

1SN_18A_1.fq.gz	3248512

1SN_18A_2.fq.gz	3248512

1SN_19A_1.fq.gz	3036463

1SN_19A_2.fq.gz	3036463

1SN_1A_1.fq.gz	3040537

1SN_1A_2.fq.gz	3040537

1SN_20A_1.fq.gz	2155177

1SN_20A_2.fq.gz	2155177

1SN_21A_1.fq.gz	3540618

1SN_21A_2.fq.gz	3540618

1SN_22A_1.fq.gz	3060365

1SN_22A_2.fq.gz	3060365

1SN_23A_1.fq.gz	3696257

1SN_23A_2.fq.gz	3696257

1SN_24A_1.fq.gz	1885265

1SN_24A_2.fq.gz	1885265

1SN_25A_1.fq.gz	3037089

1SN_25A_2.fq.gz	3037089

1SN_26A_1.fq.gz	2758873

1SN_26A_2.fq.gz	2758873

1SN_27A_1.fq.gz	2607809

1SN_27A_2.fq.gz	2607809

1SN_28A_1.fq.gz	2841587

1SN_28A_2.fq.gz	2841587

1SN_29A_1.fq.gz	2257274

1SN_29A_2.fq.gz	2257274

1SN_2A_1.fq.gz	3080107

1SN_2A_2.fq.gz	3080107

1SN_30A_1.fq.gz	3840203

1SN_30A_2.fq.gz	3840203

1SN_31A_1.fq.gz	3353715

1SN_31A_2.fq.gz	3353715

1SN_32A_1.fq.gz	2552635

1SN_32A_2.fq.gz	2552635

1SN_3A_1.fq.gz	2442489

1SN_3A_2.fq.gz	2442489

1SN_4A_1.fq.gz	3294871

1SN_4A_2.fq.gz	3294871

1SN_5A_1.fq.gz	3000852

1SN_5A_2.fq.gz	3000852

1SN_6A_1.fq.gz	3354310

1SN_6A_2.fq.gz	3354310

1SN_7A_1.fq.gz	2964594

1SN_7A_2.fq.gz	2964594

1SN_8A_1.fq.gz	3269810

1SN_8A_2.fq.gz	3269810

1SN_9A_1.fq.gz	3177205

1SN_9A_2.fq.gz	3177205

UnKnow_1.fq.gz	7344729

UnKnow_2.fq.gz	7344729

real	35m56.559s
user	16m21.347s
sys	1m55.365s