from os import chdir, mkdir
from os.path import join
#the following are only available in the current development branch of IPython
from IPython.display import FileLinks, FileLink
project_name = "Sofie_Kate_Hyrum"
mapping_file = "/Users/Jenna/Dropbox/Projects/Seagrasses/Sofie_Kate_Hyrum/Sofie_Kate_Hyrum_mapping.txt"
!validate_mapping_file.py -m $mapping_file
No errors or warnings were found in mapping file.
!mv Sofie_Kate_Hyrum_mapping_corrected.txt Sofie_Kate_Hyrum_mapping.txt
!ls
SG104_plus_NoIndex_L001_R1_001.fastq.gz SG104_plus_NoIndex_L001_R2_005.fastq.gz SG104_plus_NoIndex_L001_R4_004.fastq.gz SG104_plus_NoIndex_L001_R1_002.fastq.gz SG104_plus_NoIndex_L001_R3_001.fastq.gz SG104_plus_NoIndex_L001_R4_005.fastq.gz SG104_plus_NoIndex_L001_R1_003.fastq.gz SG104_plus_NoIndex_L001_R3_002.fastq.gz Sofie_Kate_Hyrum.ipynb SG104_plus_NoIndex_L001_R1_004.fastq.gz SG104_plus_NoIndex_L001_R3_003.fastq.gz Sofie_Kate_Hyrum_mapping.html SG104_plus_NoIndex_L001_R1_005.fastq.gz SG104_plus_NoIndex_L001_R3_004.fastq.gz Sofie_Kate_Hyrum_mapping.log SG104_plus_NoIndex_L001_R2_001.fastq.gz SG104_plus_NoIndex_L001_R3_005.fastq.gz Sofie_Kate_Hyrum_mapping.txt SG104_plus_NoIndex_L001_R2_002.fastq.gz SG104_plus_NoIndex_L001_R4_001.fastq.gz overlib.js SG104_plus_NoIndex_L001_R2_003.fastq.gz SG104_plus_NoIndex_L001_R4_002.fastq.gz SG104_plus_NoIndex_L001_R2_004.fastq.gz SG104_plus_NoIndex_L001_R4_003.fastq.gz
!extract_barcodes.py -f SG104_plus_NoIndex_L001_R2_001.fastq.gz -r SG104_plus_NoIndex_L001_R3_001.fastq.gz -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane1
!extract_barcodes.py -f SG104_plus_NoIndex_L001_R2_002.fastq.gz -r SG104_plus_NoIndex_L001_R3_002.fastq.gz -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane2
!extract_barcodes.py -f SG104_plus_NoIndex_L001_R2_003.fastq.gz -r SG104_plus_NoIndex_L001_R3_003.fastq.gz -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane3
!extract_barcodes.py -f SG104_plus_NoIndex_L001_R2_004.fastq.gz -r SG104_plus_NoIndex_L001_R3_004.fastq.gz -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane4
!extract_barcodes.py -f SG104_plus_NoIndex_L001_R2_005.fastq.gz -r SG104_plus_NoIndex_L001_R3_005.fastq.gz -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane5
!join_paired_ends.py -f SG104_plus_NoIndex_L001_R1_001.fastq.gz -r SG104_plus_NoIndex_L001_R4_001.fastq.gz -b Lane1/barcodes.fastq -o Lane1_joined
Traceback (most recent call last): File "/macqiime/QIIME/bin/join_paired_ends.py", line 168, in <module> main() File "/macqiime/QIIME/bin/join_paired_ends.py", line 144, in main working_dir = output_dir) File "/macqiime/lib/python2.7/site-packages/qiime/pycogent_backports/fastq_join.py", line 179, in join_paired_end_reads_fastqjoin HALT_EXEC=HALT_EXEC) File "/macqiime/lib/python2.7/site-packages/cogent/app/util.py", line 165, in __init__ self._error_on_missing_application(params) File "/macqiime/lib/python2.7/site-packages/cogent/app/util.py", line 434, in _error_on_missing_application % command cogent.app.util.ApplicationNotFoundError: Cannot find fastq-join. Is it installed? Is it in your path?
!join_paired_ends.py -f SG104_plus_NoIndex_L001_R1_001.fastq -r SG104_plus_NoIndex_L001_R4_001.fastq -b Lane1/barcodes.fastq -o Lane1_joined
Traceback (most recent call last): File "/macqiime/QIIME/bin/join_paired_ends.py", line 168, in <module> main() File "/macqiime/QIIME/bin/join_paired_ends.py", line 164, in main write_synced_barcodes_fastq(assembly_fp,index_reads) File "/macqiime/lib/python2.7/site-packages/qiime/join_paired_ends.py", line 68, in write_synced_barcodes_fastq " paired-end ID processed was:\n\'%s\'\n" %(joined_label) StopIteration: Reached end of index-reads file before iterating through joined paired-end-reads file! Except for missing paired-end reads that did not survive assembly, your index and paired-end reads files must be in the same order! Also, check that the index-reads and paired-end reads have identical headers. The last joined paired-end ID processed was: 'HWI-M02034:34:000000000-A7U20:1:1101:15638:1444 1:N:0:'
#to do this fix, these need to be unzipped, otherwise, they can be left alone
!gunzip SG104_plus_NoIndex_L001_R1_001.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R1_002.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R1_003.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R1_004.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R1_005.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R2_001.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R2_002.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R2_003.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R2_004.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R2_005.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R3_001.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R3_002.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R3_003.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R3_004.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R3_005.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R4_001.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R4_002.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R4_003.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R4_004.fastq.gz
!gunzip SG104_plus_NoIndex_L001_R4_005.fastq.gz
!sed 's/ 1:N:0://g' SG104_plus_NoIndex_L001_R1_001.fastq > SG104_plus_NoIndex_L001_R1_001_fixed.fastq
!sed 's/ 4:N:0://g' SG104_plus_NoIndex_L001_R4_001.fastq > SG104_plus_NoIndex_L001_R4_001_fixed.fastq
!sed 's/ 2:N:0://g' Lane1/barcodes.fastq > Lane1/barcodes_fixed.fastq
!sed 's/ 1:N:0://g' SG104_plus_NoIndex_L001_R1_002.fastq > SG104_plus_NoIndex_L001_R1_002_fixed.fastq
!sed 's/ 4:N:0://g' SG104_plus_NoIndex_L001_R4_002.fastq > SG104_plus_NoIndex_L001_R4_002_fixed.fastq
!sed 's/ 2:N:0://g' Lane2/barcodes.fastq > Lane2/barcodes_fixed.fastq
!sed 's/ 1:N:0://g' SG104_plus_NoIndex_L001_R1_003.fastq > SG104_plus_NoIndex_L001_R1_003_fixed.fastq
!sed 's/ 4:N:0://g' SG104_plus_NoIndex_L001_R4_003.fastq > SG104_plus_NoIndex_L001_R4_003_fixed.fastq
!sed 's/ 2:N:0://g' Lane3/barcodes.fastq > Lane3/barcodes_fixed.fastq
!sed 's/ 1:N:0://g' SG104_plus_NoIndex_L001_R1_004.fastq > SG104_plus_NoIndex_L001_R1_004_fixed.fastq
!sed 's/ 4:N:0://g' SG104_plus_NoIndex_L001_R4_004.fastq > SG104_plus_NoIndex_L001_R4_004_fixed.fastq
!sed 's/ 2:N:0://g' Lane4/barcodes.fastq > Lane4/barcodes_fixed.fastq
!sed 's/ 1:N:0://g' SG104_plus_NoIndex_L001_R1_005.fastq > SG104_plus_NoIndex_L001_R1_005_fixed.fastq
!sed 's/ 4:N:0://g' SG104_plus_NoIndex_L001_R4_005.fastq > SG104_plus_NoIndex_L001_R4_005_fixed.fastq
!sed 's/ 2:N:0://g' Lane5/barcodes.fastq > Lane5/barcodes_fixed.fastq
^C
!join_paired_ends.py -f SG104_plus_NoIndex_L001_R1_001_fixed.fastq -r SG104_plus_NoIndex_L001_R4_001_fixed.fastq -b Lane1/barcodes_fixed.fastq -o Lane1_joined
!join_paired_ends.py -f SG104_plus_NoIndex_L001_R1_002_fixed.fastq -r SG104_plus_NoIndex_L001_R4_002_fixed.fastq -b Lane2/barcodes_fixed.fastq -o Lane2_joined
!join_paired_ends.py -f SG104_plus_NoIndex_L001_R1_003_fixed.fastq -r SG104_plus_NoIndex_L001_R4_003_fixed.fastq -b Lane3/barcodes_fixed.fastq -o Lane3_joined
!join_paired_ends.py -f SG104_plus_NoIndex_L001_R1_004_fixed.fastq -r SG104_plus_NoIndex_L001_R4_004_fixed.fastq -b Lane4/barcodes_fixed.fastq -o Lane4_joined
!join_paired_ends.py -f SG104_plus_NoIndex_L001_R1_005_fixed.fastq -r SG104_plus_NoIndex_L001_R4_005_fixed.fastq -b Lane5/barcodes_fixed.fastq -o Lane5_joined
!cat Lane*_joined/fastqjoin.join.fastq> Sofie_Kate_Hyrum_concat.fastq
!cat Lane*_joined/fastqjoin.join_barcodes.fastq > Sofie_Kate_Hyrum_barcodes.fastq
!split_libraries_fastq.py -i Sofie_Kate_Hyrum_concat.fastq -o Demultiplexed_Files -m Sofie_Kate_Hyrum_mapping.txt -b Sofie_kate_Hyrum_barcodes.fastq --retain_unassigned_reads --barcode_type 8
Traceback (most recent call last): File "/macqiime/QIIME/bin/split_libraries_fastq.py", line 360, in <module> main() File "/macqiime/QIIME/bin/split_libraries_fastq.py", line 272, in main None) File "/macqiime/lib/python2.7/site-packages/qiime/split_libraries.py", line 287, in check_map 'please run check_id_map.py to identify problems.') ValueError: Errors were found with mapping file, please run check_id_map.py to identify problems.
!split_libraries_fastq.py -i Sofie_Kate_Hyrum_concat.fastq -o Demultiplexed_Files -m Sofie_Kate_Hyrum_mapping.txt -b Sofie_Kate_Hyrum_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2
Traceback (most recent call last): File "/macqiime/QIIME/bin/split_libraries_fastq.py", line 360, in <module> main() File "/macqiime/QIIME/bin/split_libraries_fastq.py", line 339, in main for fasta_header, sequence, quality, seq_id in seq_generator: File "/macqiime/lib/python2.7/site-packages/qiime/split_libraries_fastq.py", line 293, in process_fastq_single_end_read_file ("Headers of barcode and read do not match. Can't continue. " qiime.split_libraries_fastq.FastqParseError: Headers of barcode and read do not match. Can't continue. Confirm that the barcode fastq and read fastq that you are passing match one another.
!split_libraries_fastq.py -i Lane1_joined/fastqjoin.join.fastq -o Demultiplexed_Lane1 -m Sofie_Kate_Hyrum_mapping.txt -b Lane1_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2
!extract_barcodes.py -f SG104_plus_NoIndex_L001_R2_001.fastq -r SG104_plus_NoIndex_L001_R3_001.fastq -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane1_RC --rev_comp_bc2
!sed 's/ 1:N:0://g' SG104_plus_NoIndex_L001_R1_001.fastq > SG104_plus_NoIndex_L001_R1_001_fixed.fastq
!sed 's/ 4:N:0://g' SG104_plus_NoIndex_L001_R4_001.fastq > SG104_plus_NoIndex_L001_R4_001_fixed.fastq
!sed 's/ 2:N:0://g' Lane1_RC/barcodes.fastq > Lane1_RC/barcodes_fixed.fastq
!join_paired_ends.py -f SG104_plus_NoIndex_L001_R1_001_fixed.fastq -r SG104_plus_NoIndex_L001_R4_001_fixed.fastq -b Lane1_RC/barcodes_fixed.fastq -o Lane1_RC_joined
!split_libraries_fastq.py -i Lane1_RC_joined/fastqjoin.join.fastq -o Demultiplexed_Lane1_RC -m Sofie_Kate_Hyrum_mapping.txt -b Lane1_RC_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2
!validate_mapping_file.py -m Sofie_Kate_Hyrum_mapping.txt
Errors and/or warnings detected in mapping file. Please check the log and html file for details.
!mv Sofie_Kate_Hyrum_mapping_corrected.txt Sofie_Kate_Hyrum_mapping.txt
!split_libraries_fastq.py -i Lane1_RC_joined/fastqjoin.join.fastq -o Demultiplexed_Lane1_RC -m Sofie_Kate_Hyrum_mapping.txt -b Lane1_RC_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2
!extract_barcodes.py -r SG104_plus_NoIndex_L001_R2_001.fastq -f SG104_plus_NoIndex_L001_R3_001.fastq -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane1_switchorder
#!sed 's/ 1:N:0://g' SG104_plus_NoIndex_L001_R1_001.fastq > SG104_plus_NoIndex_L001_R1_001_fixed.fastq
#!sed 's/ 4:N:0://g' SG104_plus_NoIndex_L001_R4_001.fastq > SG104_plus_NoIndex_L001_R4_001_fixed.fastq
!sed 's/ 2:N:0://g' Lane1_switchorder/barcodes.fastq > Lane1_switchorder/barcodes_fixed.fastq
!join_paired_ends.py -r SG104_plus_NoIndex_L001_R1_001_fixed.fastq -f SG104_plus_NoIndex_L001_R4_001_fixed.fastq -b Lane1_switchorder/barcodes_fixed.fastq -o Lane1_switchorder_joined
Traceback (most recent call last): File "/macqiime/QIIME/bin/join_paired_ends.py", line 168, in <module> main() File "/macqiime/QIIME/bin/join_paired_ends.py", line 164, in main write_synced_barcodes_fastq(assembly_fp,index_reads) File "/macqiime/lib/python2.7/site-packages/qiime/join_paired_ends.py", line 68, in write_synced_barcodes_fastq " paired-end ID processed was:\n\'%s\'\n" %(joined_label) StopIteration: Reached end of index-reads file before iterating through joined paired-end-reads file! Except for missing paired-end reads that did not survive assembly, your index and paired-end reads files must be in the same order! Also, check that the index-reads and paired-end reads have identical headers. The last joined paired-end ID processed was: 'HWI-M02034:34:000000000-A7U20:1:1101:15638:1444'
!sed 's/ 3:N:0://g' Lane1_switchorder/barcodes.fastq > Lane1_switchorder/barcodes_fixed.fastq
!join_paired_ends.py -r SG104_plus_NoIndex_L001_R1_001_fixed.fastq -f SG104_plus_NoIndex_L001_R4_001_fixed.fastq -b Lane1_switchorder/barcodes_fixed.fastq -o Lane1_switchorder_joined
!split_libraries_fastq.py -i Lane1_switchorder_joined/fastqjoin.join.fastq -o Demultiplexed_Lane1_switchorder -m Sofie_Kate_Hyrum_mapping.txt -b Lane1_switchorder_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2
!extract_barcodes.py -r SG104_plus_NoIndex_L001_R2_002.fastq -f SG104_plus_NoIndex_L001_R3_002.fastq -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane2_switchorder
#!sed 's/ 1:N:0://g' SG104_plus_NoIndex_L001_R1_001.fastq > SG104_plus_NoIndex_L001_R1_001_fixed.fastq
#!sed 's/ 4:N:0://g' SG104_plus_NoIndex_L001_R4_001.fastq > SG104_plus_NoIndex_L001_R4_001_fixed.fastq
!sed 's/ 3:N:0://g' Lane2_switchorder/barcodes.fastq > Lane2_switchorder/barcodes_fixed.fastq
!join_paired_ends.py -r SG104_plus_NoIndex_L001_R1_002_fixed.fastq -f SG104_plus_NoIndex_L001_R4_002_fixed.fastq -b Lane2_switchorder/barcodes_fixed.fastq -o Lane2_switchorder_joined
!split_libraries_fastq.py -i Lane2_switchorder_joined/fastqjoin.join.fastq -o Demultiplexed_Lane2_switchorder -m Sofie_Kate_Hyrum_mapping.txt -b Lane2_switchorder_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2
!extract_barcodes.py -r SG104_plus_NoIndex_L001_R2_003.fastq -f SG104_plus_NoIndex_L001_R3_003.fastq -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane3_switchorder
!extract_barcodes.py -r SG104_plus_NoIndex_L001_R2_004.fastq -f SG104_plus_NoIndex_L001_R3_004.fastq -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane4_switchorder
!extract_barcodes.py -r SG104_plus_NoIndex_L001_R2_005.fastq -f SG104_plus_NoIndex_L001_R3_005.fastq -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane5_switchorder
!sed 's/ 3:N:0://g' Lane3_switchorder/barcodes.fastq > Lane3_switchorder/barcodes_fixed.fastq
!sed 's/ 3:N:0://g' Lane4_switchorder/barcodes.fastq > Lane4_switchorder/barcodes_fixed.fastq
!sed 's/ 3:N:0://g' Lane5_switchorder/barcodes.fastq > Lane5_switchorder/barcodes_fixed.fastq
!join_paired_ends.py -r SG104_plus_NoIndex_L001_R1_003_fixed.fastq -f SG104_plus_NoIndex_L001_R4_003_fixed.fastq -b Lane3_switchorder/barcodes_fixed.fastq -o Lane3_switchorder_joined
!join_paired_ends.py -r SG104_plus_NoIndex_L001_R1_004_fixed.fastq -f SG104_plus_NoIndex_L001_R4_004_fixed.fastq -b Lane4_switchorder/barcodes_fixed.fastq -o Lane4_switchorder_joined
!join_paired_ends.py -r SG104_plus_NoIndex_L001_R1_005_fixed.fastq -f SG104_plus_NoIndex_L001_R4_005_fixed.fastq -b Lane5_switchorder/barcodes_fixed.fastq -o Lane5_switchorder_joined
!split_libraries_fastq.py -i Lane3_switchorder_joined/fastqjoin.join.fastq -o Demultiplexed_Lane3_switchorder -m Sofie_Kate_Hyrum_mapping.txt -b Lane3_switchorder_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2
!split_libraries_fastq.py -i Lane4_switchorder_joined/fastqjoin.join.fastq -o Demultiplexed_Lane4_switchorder -m Sofie_Kate_Hyrum_mapping.txt -b Lane4_switchorder_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2
!split_libraries_fastq.py -i Lane5_switchorder_joined/fastqjoin.join.fastq -o Demultiplexed_Lane5_switchorder -m Sofie_Kate_Hyrum_mapping.txt -b Lane5_switchorder_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2
import matplotlib
import pylab
import time # Imports system time module to time your script
from pylab import * # Imports numpy, scipy, and matplotlib etc.
from scipy import stats as st
import csv # Imports .csv file reader
close('all') # close all open figures
# Read in small data from .csv file
# Filepath
filepath = './'
# In windows you can also specify the absolute path to your data file
# filepath = 'C:/Dropbox/Towson/Teaching/3_ComputationalEconomics/Lectures/Lecture4/'
# ------------- Load data --------------------
data =[] # Define empty list for data reading
for column in csv.reader(open(filepath+"Compare.csv"), delimiter=','):
data.append(column) # read data column by column
QIIME = []
for column in data:
QIIME.append(column[0])
#QIIME
IN_HOUSE = []
for column in data:
IN_HOUSE.append(column[1])
#IN_HOUSE
pylab.plot(QIIME, IN_HOUSE, 'ro')
xlabel('QIIME')
ylabel('IN_HOUSE')
title('Reads assigned to libraries')
pylab.show()