#make Anaplasma BLAST database #check source fasta file for number of sequences #by counting the number of greater-than symbols ('>') in the file, since each #sequence entry is preceeded by a '>'. !awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/AnaplasmaGBnt20140305.fasta #make the Anaplasma BLAST database #put the "time" command at the beginning for fun #the "time" command is NOT part of the BLAST package, but is already built into the Terminal !time makeblastdb -dbtype nucl -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/AnaplasmaGBnt20140305.fasta -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/AnaplasmaGBnt20140305 #check cowdria fasta file numbers !awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/CowdriaGBnt20140305.fasta #make cowdria BLAST database !time makeblastdb -dbtype nucl -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/CowdriaGBnt20140305.fasta -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/CowdriaGBnt20140305 #check ehrlichia fasta file numbers !awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/EhrlichiaGBnt20140305.fasta #make ehrlichia BLAST database !time makeblastdb -dbtype nucl -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/EhrlichiaGBnt20140305.fasta -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/EhrlichiaGBnt20140305 #perform BLASTN of de novo assembly of all abalone seqs #against anaplasma BLAST database !time blastn -db -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AllAbDenovo7118contigs.fa -outfmt "6 stitle std" -max_target_seqs 3 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AllAbDenovo7118contigsAnaplasmaGBntBLASTN.txt All BLASTs will be performed in the same fashion with the following options utlized: -task blastn: Tells BLAST to use BLASTn instead of the default MegaBLAST -outfmt "6 stitle std": Specifies the output format number 6 (which is a tab-delimited ouput file) and indicates that the output should add the subject title (stitle) in addition to the standard (std) BLASTn output columns. - max_target_seqs 3: Tells BLAST to match a maximum of 3 database sequences per query sequence. The default is 500! - num_threads 16: Speciifies the number of processing threads to use. I just multiplied the number of CPUs I have listed in my resource monitor by four, since the processor is listed as a quad core. #forgot to specify BLAST db !time blastn -db /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/AnaplasmaGBnt20140305 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AllAbDenovo7118contigs.fa -outfmt "6 stitle std" -max_target_seqs 3 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AllAbDenovo7118contigsAnaplasmaGBntBLASTN.txt #perform BLASTN of de novo assembly of all abalone seqs #against cowdria BLAST database !time blastn -db /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/CowdriaGBnt20140305 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AllAbDenovo7118contigs.fa -outfmt "6 stitle std" -max_target_seqs 3 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AllAbDenovo7118contigsCowdriaGBntBLASTN.txt #perform BLASTN of de novo assembly of all abalone seqs #against ehrilichia BLAST database !time blastn -db /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/EhrlichiaGBnt20140305 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AllAbDenovo7118contigs.fa -outfmt "6 stitle std" -max_target_seqs 3 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AllAbDenovo7118contigsEhrlichiaGBntBLASTN.txt