BioPerl

Manipulation de séquences

Création d'un objet

In [1]:
use Bio::Seq;
my $seqobj = Bio::Seq->new(
    -seq => "ACTGTGTGTCC",
    -id => "Chlorella sorokiniana",
    -accession_number => "CAA41635"
);
Bio::Seq=HASH(0x7f8d1400a738)
In [2]:
use Data::Dumper;
Dumper($seqobj);
$VAR1 = bless( {
                 'primary_seq' => bless( {
                                           'length' => 11,
                                           'display_id' => 'Chlorella sorokiniana',
                                           'accession_number' => 'CAA41635',
                                           '_root_verbose' => 0,
                                           'seq' => 'ACTGTGTGTCC',
                                           '_nowarnonempty' => undef,
                                           'alphabet' => 'dna'
                                         }, 'Bio::PrimarySeq' ),
                 '_root_verbose' => 0
               }, 'Bio::Seq' );

Méthodes

Méthodes (1)

In [3]:
$seqobj->seq();
ACTGTGTGTCC
In [4]:
$seqobj->subseq(5,10);
TGTGTC
In [7]:
$seqobj->accession_number();
CAA41635
In [8]:
$seqobj->alphabet();
dna
In [9]:
$seqobj->version();
In [11]:
$seqobj->length();
11
In [12]:
$seqobj->desc();
In [13]:
$seqobj->primary_id();
Bio::PrimarySeq=HASH(0x7f8d10eb9e20)
In [14]:
$seqobj->display_id();
Chlorella sorokiniana

Méthodes (2)

In [20]:
$trunc_seq_obj = $seqobj->trunc(5,10);
Bio::Seq=HASH(0x7f8d1400ab88)
In [26]:
print $seqobj->seq(),"\n";
print $trunc_seq_obj->seq(),"\n";
Out[26]:
ACTGTGTGTCC
TGTGTC
1
In [22]:
$revcom_seq_obj=$seqobj->revcom;
Bio::Seq=HASH(0x7f8d12654130)
In [27]:
print $seqobj->seq(),"\n";
print $revcom_seq_obj->seq(),"\n";
Out[27]:
ACTGTGTGTCC
GGACACACAGT
1
In [28]:
$translated_seq_obj=$seqobj->translate;
Bio::Seq=HASH(0x7f8d1400ab28)
In [29]:
print $seqobj->seq(),"\n";
print $translated_seq_obj->seq(),"\n";
Out[29]:
ACTGTGTGTCC
TVC
1

Méthodes (3)

In [34]:
$string = "ATTGCTTT";
print $seqobj->validate_seq($string),"\n";
Out[34]:
1
1
In [35]:
$string = "TTT53FFF";
print $seqobj->validate_seq($string),"\n";
Out[35]:
0
1

Manipulation de séquences

In [25]:
$seq = $seqobj->seq();
print "$seq\n";
Out[25]:
ACTGTGTGTCC
1
In [26]:
$length = $seqobj->length();
print "$length\n";
Out[26]:
11
1
In [27]:
$subseq = $seqobj->subseq(int($length/2), $length);
print "$subseq\n";
Out[27]:
TGTGTCC
1
In [28]:
$new_seq = $seq.$subseq;
if($seqobj->validate_seq($new_seq)){
    $seqobj->seq($new_seq);
}
print $seqobj->seq()," ", $seqobj->length(), "\n"
Out[28]:
ACTGTGTGTCCTGTGTCC 18
1

Traduction

In [41]:
$translated_obj = $seq;
if( $seqobj->alphabet() == 'dna'){
    $translated_obj = $seqobj->translate();
}
print $translated_obj->seq(),"\n";
Out[41]:
TVCPVS
1

Récupération de statistiques sur une séquence

Création

In [36]:
use Bio::Tools::SeqStats;
$seq_stats  =  Bio::Tools::SeqStats->new(-seq => $seqobj);
Bio::Tools::SeqStats=HASH(0x7f8d15079050)

Méthodes

In [39]:
$hash_ref = $seq_stats->count_monomers();
Dumper($hash_ref);
$VAR1 = {
          'A' => 1,
          'T' => 4,
          'C' => 3,
          'G' => 3
        };

In [40]:
$hash_ref = Bio::Tools::SeqStats->count_codons($seqobj);
Dumper($hash_ref);
$VAR1 = {
          'ACT' => 1,
          'GTG' => 1,
          'TGT' => 1
        };

In [42]:
$weight = Bio::Tools::SeqStats->get_mol_wt($seqobj);
Dumper($weight);
$VAR1 = [
          '3403.5',
          '3403.5'
        ];

In [43]:
Bio::Tools::SeqStats->hydropathicity($seqobj);

------------- EXCEPTION -------------
MSG: hydropathicity only meaningful for protein, not for dna sequences.
STACK Bio::Tools::SeqStats::hydropathicity /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Bio/Tools/SeqStats.pm:745
STACK Eval::Closure::Sandbox_627::__ANON__ reply input:1
STACK Reply::Plugin::Defaults::execute /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Reply/Plugin/Defaults.pm:72
STACK Reply::_wrapped_plugin /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Reply.pm:219
STACK Reply::__ANON__ /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Reply.pm:217
STACK Reply::Plugin::IPerl::__ANON__ /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Reply/Plugin/IPerl.pm:28
STACK (eval) /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Capture/Tiny.pm:369
STACK Capture::Tiny::_capture_tee /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Capture/Tiny.pm:369
STACK Reply::Plugin::IPerl::execute /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Reply/Plugin/IPerl.pm:29
STACK Reply::_wrapped_plugin /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Reply.pm:219
STACK Reply::_eval /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Reply.pm:176
STACK Reply::try {...}  /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Reply.pm:68
STACK (eval) /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Try/Tiny.pm:81
STACK Try::Tiny::try /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Try/Tiny.pm:72
STACK Reply::step /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Reply.pm:73
STACK Devel::IPerl::Kernel::Backend::Reply::__ANON__ /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Devel/IPerl/Kernel/Backend/Reply.pm:48
STACK (eval) /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Capture/Tiny.pm:369
STACK Capture::Tiny::_capture_tee /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Capture/Tiny.pm:369
STACK Devel::IPerl::Kernel::Backend::Reply::run_line /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Devel/IPerl/Kernel/Backend/Reply.pm:49
STACK Devel::IPerl::Kernel::Callback::REPL::execute /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Devel/IPerl/Kernel/Callback/REPL.pm:42
STACK Devel::IPerl::Kernel::Callback::REPL::execute (eval 55):6
STACK Devel::IPerl::Kernel::Callback::REPL::msg_execute_request /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Devel/IPerl/Kernel/Callback/REPL.pm:155
STACK Devel::IPerl::Kernel::route_message /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Devel/IPerl/Kernel.pm:223
STACK Devel::IPerl::Kernel::__ANON__ /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Devel/IPerl/Kernel.pm:195
STACK IO::Async::Loop::Poll::post_poll /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/IO/Async/Loop/Poll.pm:172
STACK IO::Async::Loop::Poll::loop_once /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/IO/Async/Loop/Poll.pm:270
STACK IO::Async::Loop::run /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/IO/Async/Loop.pm:524
STACK IO::Async::Loop::loop_forever /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/IO/Async/Loop.pm:561
STACK Devel::IPerl::Kernel::run /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Devel/IPerl/Kernel.pm:203
STACK Devel::IPerl::main /Users/bbatut/perl5/perlbrew/perls/perl-5.16.0/lib/site_perl/5.16.0/Devel/IPerl.pm:14
STACK toplevel -e:1
-------------------------------------

Manipulation de fichier de séquences

In [46]:
use Bio::SeqIO;
my $seqio_obj = Bio::SeqIO->new(-file => '>sequence.fasta', 
    -format => 'fasta' );
Bio::SeqIO::fasta=HASH(0x7f8d1402ffd8)
In [47]:
Dumper($seqio_obj );

Ecrire des séquences dans un fichier

In [2]:
use Bio::SeqIO;
use Bio::Seq;

my $seqio_obj = Bio::SeqIO->new(-file => '>sequence.fasta', 
    -format => 'fasta' );

my $seqobj = Bio::Seq->new(
    -seq => "ACTGTGTGTCC",
    -id => "Chlorella sorokiniana"
);

$seqio_obj->write_seq($seqobj);

my $seqobj = Bio::Seq->new(
    -seq => "ACTGTGTGTCCTGTGTCC",
    -id => "Modified Chlorella sorokiniana"
);

$seqio_obj->write_seq($seqobj);
1

--------------------- WARNING ---------------------
MSG: No whitespace allowed in FASTA ID [Chlorella sorokiniana]
---------------------------------------------------


--------------------- WARNING ---------------------
MSG: No whitespace allowed in FASTA ID [Chlorella sorokiniana]
---------------------------------------------------


--------------------- WARNING ---------------------
MSG: No whitespace allowed in FASTA ID [Modified Chlorella sorokiniana]
---------------------------------------------------


--------------------- WARNING ---------------------
MSG: No whitespace allowed in FASTA ID [Modified Chlorella sorokiniana]
---------------------------------------------------

Lecture de séquences d'un fichier

In [3]:
use Bio::SeqIO;

$seqio_obj = Bio::SeqIO->new(-file => "sequence.fasta", 
    -format => "fasta" );
    
while ($seq_obj = $seqio_obj->next_seq){
    print $seq_obj->seq,"\n";
}
Out[3]:
ACTGTGTGTCC
ACTGTGTGTCCTGTGTCC

Accès aux bases de données

In [44]:
use Bio::DB::GenBank;
$db_obj = Bio::DB::GenBank->new;
Bio::DB::GenBank=HASH(0x7f8d15079998)
In [45]:
Dumper($db_obj);

Récupération d'une séquence dans une base de données

In [6]:
use Bio::DB::GenBank;
use Bio::Seq;

$db_obj = Bio::DB::GenBank->new;
 
$seq_obj = $db_obj->get_Seq_by_id(2);

print $seq_obj->display_id(),"\n";
Out[6]:
A00002
1

Récupération de plusieurs séquences

In [8]:
use Bio::DB::GenBank;
use Bio::DB::Query::GenBank;
 
$query = "Arabidopsis[ORGN] AND topoisomerase[TITL] and 0:3000[SLEN]";
$query_obj = Bio::DB::Query::GenBank->new(-db    => 'nucleotide',  -query => $query );
 
$gb_obj = Bio::DB::GenBank->new;
 
$stream_obj = $gb_obj->get_Stream_by_query($query_obj);
 
while ($seq_obj = $stream_obj->next_seq) {     
    print $seq_obj->display_id, "\t", $seq_obj->length, "\n";
}
Out[8]:
NM_001203615	2099
NM_001125591	549
NM_128760	2775
NM_120360	1655
NM_001036386	2734
NM_112969	2212
NM_126949	704
ATHTOP2A	410
XM_002881164	2698
AF323679	1373
DQ446392	1152
AY142667	2140
AY133820	2113
AY090993	1830
AY080864	928
AY039947	2493
AY034972	2586
U12285	591
U12284	590
AK221851	1078
AK221766	1452
AK221174	2785
AJ297843	2212
AJ297842	1655
AJ251990	1655
AJ251989	1640
AK175923	2744
AK175789	2744

Parser des rapports de recherche

Parcours d'un fichier issu d'une requête Blast

In [48]:
use Bio::SearchIO;

my $in = new Bio::SearchIO(
    -format => "blast",
    -file => "files/report.bls");
while(my $result = $in->next_result){
    while(my $hit = $result->next_hit){
        while(my $hsp = $hit->next_hsp){
            print "Query=", $result->query_name,
                " Hit=", $hit->name,
                " Length=", $hsp->length('total'),
                " Percent_id=", $hsp->percent_identity,
                "\n";
        }
    }
}
Out[48]:
Query=gi|20521485|dbj|AP004641.2 Hit=gb|443893|124775 Length=52 Percent_id=88.4615384615385
In [ ]: