#@title Double click to see the cell of the Python program
TARA_SAMPLE_ID = "TARA_A100000171"
# REQUEST_PARAMS is a list of tuples that identify subsequences to extract
# each tuple must have the values (sequence_id, start_index, stop_index, sequence_type)
# sequence type accepted values are [raw, complement, reverse_complement], optional value if ommited defaults to "raw".
REQUEST_PARAMS = [
("TARA_A100000171_G_scaffold48_1", 10, 50, "complement"),
("TARA_A100000171_G_scaffold48_1", 10, 50),
("TARA_A100000171_G_scaffold48_1", 10, 50, "reverse_complement"),
("TARA_A100000171_G_scaffold181_1", 0, 50),
("TARA_A100000171_G_scaffold181_1", 100, 200),
("TARA_A100000171_G_scaffold181_1", 200, 230),
("TARA_A100000171_G_scaffold493_2", 54, 76),
("TARA_A100000171_G_scaffold50396_2", 87, 105),
("TARA_A100000171_G_C2001995_1", 20, 635),
("TARA_A100000171_G_C2026460_1", 0, 100),
]
#@title Double click to see the cell of the Python program
from oceania import get_sequences_from_fasta
request_result = get_sequences_from_fasta(
TARA_SAMPLE_ID,
REQUEST_PARAMS
)
# get_sequences_from_fasta returns a pandas.DataFrame with the extracted sequences
print(request_result)
[30-06-2021 07:58:00] Sending request for fasta sequences [30-06-2021 07:58:01] Request accepted [30-06-2021 07:58:01] Waiting for results... [30-06-2021 07:59:38] Done. Elapsed time: 98.09430822399736 seconds
Result loaded into a DataFrame id start end type \ 0 TARA_A100000171_G_scaffold48_1 10 50 complement 1 TARA_A100000171_G_scaffold48_1 10 50 raw 2 TARA_A100000171_G_scaffold48_1 10 50 reverse_complement 3 TARA_A100000171_G_scaffold181_1 0 50 raw 4 TARA_A100000171_G_scaffold181_1 100 200 raw 5 TARA_A100000171_G_scaffold181_1 200 230 raw 6 TARA_A100000171_G_scaffold493_2 54 76 raw 7 TARA_A100000171_G_scaffold50396_2 87 105 raw 8 TARA_A100000171_G_C2001995_1 20 635 raw 9 TARA_A100000171_G_C2026460_1 0 100 raw sequence 0 ACCGTAACGTAGGCCATATTATTTTCATGGTCTTCCACAA 1 TGGCATTGCATCCGGTATAATAAAAGTACCAGAAGGTGTT 2 AACACCTTCTGGTACTTTTATTATACCGGATGCAATGCCA 3 CCAAGACCAAGCAATTTTAACACCACACTTAGATACTGCGCAAACA... 4 ATTATGTTACCAGCACTTGATAACCAAAAAGTTTGGGcaggattaa... 5 ATCAAACTGATGCTACTAACTCAGAAGCAT 6 TAAGTTTTTATTATTATATTTT 7 AGCTGTTCGGAAAACTAG 8 ACAGCACACCAAGCAGGTCGTCGACCGAAACGATATTGAGAAGAAT... 9 AATTTGAAACAACCCTAAAGTGTTTACCATAATAGGTTCTTAAATC...