cd Volumes/web/cnidarian/
/Volumes/web/cnidarian
!curl -O http://eagle.fish.washington.edu/bivalvia/array/genesWithDiffMeth.fasta
% Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 244k 100 244k 0 0 1723k 0 --:--:-- --:--:-- --:--:-- 1835k
!wc /Volumes/web/cnidarian/genesWithDiffMeth.fasta
166 166 250002 /Volumes/web/cnidarian/genesWithDiffMeth.fasta
!fgrep -c ">" /Volumes/web/cnidarian/genesWithDiffMeth.fasta
83
!blastn -h
USAGE blastn [-h] [-help] [-import_search_strategy filename] [-export_search_strategy filename] [-task task_name] [-db database_name] [-dbsize num_letters] [-gilist filename] [-seqidlist filename] [-negative_gilist filename] [-entrez_query entrez_query] [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm] [-subject subject_input_file] [-subject_loc range] [-query input_file] [-out output_file] [-evalue evalue] [-word_size int_value] [-gapopen open_penalty] [-gapextend extend_penalty] [-perc_identity float_value] [-xdrop_ungap float_value] [-xdrop_gap float_value] [-xdrop_gap_final float_value] [-searchsp int_value] [-max_hsps_per_subject int_value] [-penalty penalty] [-reward reward] [-no_greedy] [-min_raw_gapped_score int_value] [-template_type type] [-template_length int_value] [-dust DUST_options] [-filtering_db filtering_database] [-window_masker_taxid window_masker_taxid] [-window_masker_db window_masker_db] [-soft_masking soft_masking] [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value] [-best_hit_score_edge float_value] [-window_size int_value] [-off_diagonal_range int_value] [-use_index boolean] [-index_name string] [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines] [-outfmt format] [-show_gis] [-num_descriptions int_value] [-num_alignments int_value] [-html] [-max_target_seqs num_sequences] [-num_threads int_value] [-remote] [-version] DESCRIPTION Nucleotide-Nucleotide BLAST 2.2.28+ Use '-help' to print detailed descriptions of command line arguments
#Blastn refseqgene default output
!blastn -task blastn -query /Volumes/web/cnidarian/genesWithDiffMeth.fasta -db /Volumes/web/whale/blast/db/refseqgene -out /Volumes/web/cnidarian/_MGarray.x.refseqgene -num_threads 2
from IPython.display import HTML
HTML('<iframe src=http://eagle.fish.washington.edu/cnidarian/_MGarray.x.refseqgene width=700 height=350></iframe>')
#nt default
!blastn -task blastn -query /Volumes/web/cnidarian/genesWithDiffMeth.fasta -db /Volumes/web/whale/blast/db/nt -out /Volumes/web/cnidarian/_MGarray.x.nt -num_threads 2
from IPython.display import HTML
HTML('<iframe src=http://eagle.fish.washington.edu/cnidarian/_MGarray.x.nt width=700 height=350></iframe>')
#nt tabular output
!blastn -task blastn -query /Volumes/web/cnidarian/genesWithDiffMeth.fasta -db /Volumes/web/whale/blast/db/nt -out /Volumes/web/cnidarian/_MGarray.x.nt2 -outfmt 6 -max_target_seqs 1 -num_threads 2
!head /Volumes/web/cnidarian/_MGarray.x.nt2
CGI_10003380 gi|524888538|ref|XM_005100782.1| 71.46 445 121 4 574 1015 792 1233 2e-52 217 CGI_10004132 gi|524891254|ref|XM_005102106.1| 67.60 392 120 3 16 402 10 399 4e-25 127 CGI_10004132 gi|524891254|ref|XM_005102106.1| 88.89 36 4 0 1594 1629 1729 1764 0.31 48.2 CGI_10004278 gi|542204616|ref|XM_003457422.2| 65.40 1312 400 18 65 1352 99 1380 3e-73 288 CGI_10004344 gi|291232514|ref|XM_002736163.1| 65.36 1045 315 22 94 1116 130 1149 8e-46 196 CGI_10004344 gi|291232514|ref|XM_002736163.1| 73.28 116 31 0 1594 1709 1684 1799 8e-08 69.8 CGI_10004940 gi|524909158|ref|XM_005109573.1| 68.64 1977 560 29 202 2154 346 2286 0.0 693 CGI_10005087 gi|260802603|ref|XM_002596136.1| 68.74 803 247 4 1543 2343 133 933 1e-77 302 CGI_10005249 gi|524889131|ref|XM_005101070.1| 69.92 4402 1227 50 742 5090 1555 5912 0.0 1831 CGI_10005249 gi|524889131|ref|XM_005101070.1| 68.33 3653 1008 58 5254 8815 6094 9688 0.0 1245
!/Users/Shared/Apps/ncbi-blast_28/bin/deltablast -h
USAGE deltablast [-h] [-help] [-import_search_strategy filename] [-export_search_strategy filename] [-db database_name] [-dbsize num_letters] [-gilist filename] [-seqidlist filename] [-negative_gilist filename] [-subject subject_input_file] [-subject_loc range] [-query input_file] [-out output_file] [-evalue evalue] [-word_size int_value] [-gapopen open_penalty] [-gapextend extend_penalty] [-xdrop_ungap float_value] [-xdrop_gap float_value] [-xdrop_gap_final float_value] [-searchsp int_value] [-max_hsps_per_subject int_value] [-seg SEG_options] [-soft_masking soft_masking] [-matrix matrix_name] [-threshold float_value] [-culling_limit int_value] [-best_hit_overhang float_value] [-best_hit_score_edge float_value] [-window_size int_value] [-lcase_masking] [-query_loc range] [-parse_deflines] [-outfmt format] [-show_gis] [-num_descriptions int_value] [-num_alignments int_value] [-html] [-max_target_seqs num_sequences] [-num_threads int_value] [-remote] [-comp_based_stats compo] [-use_sw_tback] [-gap_trigger float_value] [-num_iterations int_value] [-out_pssm checkpoint_file] [-out_ascii_pssm ascii_mtx_file] [-pseudocount pseudocount] [-domain_inclusion_ethresh ethresh] [-inclusion_ethresh ethresh] [-rpsdb database_name] [-show_domain_hits] [-version] DESCRIPTION Domain enhanced lookup time accelarated BLAST 2.2.28+ Use '-help' to print detailed descriptions of command line arguments
!/Users/Shared/Apps/ncbi-blast_28/bin/deltablast -query /Volumes/web/cnidarian/genesWithDiffMeth.fasta -db /Volumes/web/whale/blast/db/cdd_delta -out /Volumes/web/cnidarian/_MGarray.x.cdd_delta -num_threads 14
BLAST Database error: No alias or index file found for protein database [cdd_delta] in search path [/Users/sr320/Dropbox/Steven/ipython_nb::]
!blastx -query /Volumes/web/cnidarian/genesWithDiffMeth.fasta -db /Volumes/web/whale/blast/db/uniprot_sprot -out /Volumes/web/cnidarian/_MGarray.x.swissprot -outfmt 6 -max_target_seqs 1 -num_threads 14
!head /Volumes/web/cnidarian/_MGarray.x.swissprot
CGI_10003380 sp|P51650|SSDH_RAT 54.51 488 196 2 211 1668 60 523 0.0 530 CGI_10004132 sp|Q8CHW4|EI2BE_MOUSE 39.73 672 376 11 58 1998 39 706 1e-160 486 CGI_10004278 sp|Q5F3K4|WDR48_CHICK 57.08 678 239 16 55 2007 17 669 0.0 704 CGI_10004344 sp|Q58EN8|VP33B_DANRE 45.57 621 273 10 25 1743 12 615 1e-180 531 CGI_10004940 sp|Q28BL6|AACS_XENTR 61.79 683 242 7 118 2151 4 672 0.0 884 CGI_10005087 sp|Q9VJ79|PDE11_DROME 57.32 717 265 5 376 2412 387 1100 0.0 805 CGI_10005087 sp|Q9VJ79|PDE11_DROME 33.80 213 123 4 841 1437 385 593 3e-27 123 CGI_10005087 sp|Q9VJ79|PDE11_DROME 49.55 111 51 2 52 369 195 305 5e-17 90.5 CGI_10005249 sp|Q9Y4A5|TRRAP_HUMAN 63.96 2775 918 23 721 8847 541 3299 0.0 3579 CGI_10005249 sp|Q9Y4A5|TRRAP_HUMAN 76.72 580 131 1 9046 10773 3280 3859 0.0 894
Need to pull fasta down from DMR bed
!tail /Volumes/web/bivalvia/array/2013.11.22.mgavery/mgaveryDMRs_112212.gff
scaffold459 MBD_ChIP HYPER 186321 186568 . . . HYPER scaffold59 MBD_ChIP HYPER 225189 225453 . . . HYPER scaffold601 MBD_ChIP HYPER 1116073 1116588 . . . HYPER scaffold733 MBD_ChIP HYPER 26797 27176 . . . HYPER scaffold733 MBD_ChIP HYPER 27741 29238 . . . HYPER scaffold748 MBD_ChIP HYPER 187113 187500 . . . HYPER scaffold759 MBD_ChIP HYPER 29417 29782 . . . HYPER scaffold759 MBD_ChIP HYPER 32132 32777 . . . HYPER scaffold801 MBD_ChIP HYPER 257945 258188 . . . HYPER scaffold82 MBD_ChIP HYPER 242904 243288 . . . HYPER
!wc /Volumes/web/bivalvia/array/2013.11.22.mgavery/mgaveryDMRs_112212.gff
48 441 2561 /Volumes/web/bivalvia/array/2013.11.22.mgavery/mgaveryDMRs_112212.gff
!fastaFromBed -fi /Volumes/web/cnidarian/oyster.v9.fa -bed /Volumes/web/bivalvia/array/2013.11.22.mgavery/mgaveryDMRs_112212.gff -fo /Volumes/web/cnidarian/mgDMRonly.fa
!cp /Volumes/web/cnidarian/mgDMRonly.fa /Volumes/web/cnidarian/mgDMRonly/query.fa
cd /Volumes/web/cnidarian/
/Volumes/web/cnidarian
!mkdir mgDMRonly_mouse
#Setting Working Directory
wd="/Volumes/web/cnidarian/mgDMRonly_mouse/"
#Setting directory of Blast Databases !!! make sure you have last '/'
#dbd="/Volumes/Bay3/Software/ncbi-blast-2.2.29\+/db/"
dbd="/Volumes/Bay3/CLC_blastdatabases/"
#Database name
dbn="nt"
#Blast algorithim complete path
ba="/Volumes/Bay3/Software/ncbi-blast-2.2.29\+/bin/blastn"
#Location of SQLShare python tools: you can empty ("") if tools are in PATH !!! make sure you have last '/'
#spd="/Users/Mackenzie/sqlshare-pythonclient/tools/"
spd="/Users/sr320/sqlshare-pythonclient/tools/"
cd {wd}
/Volumes/web/cnidarian/mgDMRonly_mouse
!{ba} -query query.fa -db {dbd}{dbn} -out {dbn}_blast_out.tab -evalue 1E-10 -task blastn -num_threads 4 -max_target_seqs 1 -outfmt 6
!head -1 {dbn}_blast_out.tab
scaffold39990:18800-19176 gi|289063368|tpg|BK007044.1| 67.65 340 102 5 31 366 376 711 2e-18 102
!tr '|' "\t" <{dbn}_blast_out.tab> {dbn}_blast_out2.tab
#Uploads formatted blast table to SQLshare; currently has generic name and meant to be temporary: Warning will overwrite.
!python {spd}singleupload.py -d scratchblast_out {dbn}_blast_out2.tab
processing chunk line 0 to 7 (0.000303983688354 s elapsed) pushing uniprot-mouse-reference_blast_out2.tab... parsing 19E9D90A... finished scratchblast_out
!python {spd}fetchdata.py -s "SELECT * FROM [sr320@washington.edu].[scratchblast_out]blast Left Join [sr320@washington.edu].[uniprot-reviewed_wGO_010714]unp ON blast.Column3 = unp.Entry Left Join [sr320@washington.edu].[SPID and GO Numbers]go ON unp.Entry = go.SPID Left Join [sr320@washington.edu].[GO_to_GOslim]slim ON slim.GO_id = go.GOID" -f tsv -o {dbn}_join2goslim.txt
!head -2 {dbn}_join2goslim.txt