TARA_SAMPLE_ID = "TARA_R110002003"
# REQUEST_PARAMS is a list of tuples that identify subsequences to extract
# each tuple must have the values (sequence_id, start_index, stop_index, sequence_type)
# sequence type accepted values are [raw, complement, reverse_complement], optional value if ommited defaults to "raw".
REQUEST_PARAMS = [
("TARA_R110002003_G_scaffold3_1",3290,6293),
("TARA_R110002003_G_scaffold3_3",0,327),
("TARA_R110002003_G_scaffold3_3",944,2742),
("TARA_R110002003_G_scaffold3_4",379,379),
("TARA_R110002003_G_scaffold3_4",1530,1669)
]
#@title Double click to see the cell of the Python program
from oceania import get_sequences_from_fasta
request_result = get_sequences_from_fasta(
TARA_SAMPLE_ID,
REQUEST_PARAMS
)
# get_sequences_from_fasta returns a pandas.DataFrame with the extracted sequences
print(request_result)
[30-06-2021 07:58:00] Sending request for fasta sequences [30-06-2021 07:58:01] Request accepted [30-06-2021 07:58:01] Waiting for results... [30-06-2021 07:59:38] Done. Elapsed time: 98.09430822399736 seconds
Result loaded into a DataFrame id start end type \ 0 TARA_A100000171_G_scaffold48_1 10 50 complement 1 TARA_A100000171_G_scaffold48_1 10 50 raw 2 TARA_A100000171_G_scaffold48_1 10 50 reverse_complement 3 TARA_A100000171_G_scaffold181_1 0 50 raw 4 TARA_A100000171_G_scaffold181_1 100 200 raw 5 TARA_A100000171_G_scaffold181_1 200 230 raw 6 TARA_A100000171_G_scaffold493_2 54 76 raw 7 TARA_A100000171_G_scaffold50396_2 87 105 raw 8 TARA_A100000171_G_C2001995_1 20 635 raw 9 TARA_A100000171_G_C2026460_1 0 100 raw sequence 0 ACCGTAACGTAGGCCATATTATTTTCATGGTCTTCCACAA 1 TGGCATTGCATCCGGTATAATAAAAGTACCAGAAGGTGTT 2 AACACCTTCTGGTACTTTTATTATACCGGATGCAATGCCA 3 CCAAGACCAAGCAATTTTAACACCACACTTAGATACTGCGCAAACA... 4 ATTATGTTACCAGCACTTGATAACCAAAAAGTTTGGGcaggattaa... 5 ATCAAACTGATGCTACTAACTCAGAAGCAT 6 TAAGTTTTTATTATTATATTTT 7 AGCTGTTCGGAAAACTAG 8 ACAGCACACCAAGCAGGTCGTCGACCGAAACGATATTGAGAAGAAT... 9 AATTTGAAACAACCCTAAAGTGTTTACCATAATAGGTTCTTAAATC...