%%perl # central_dogma.pl # Print the Central Dogma of Molecular Biology use strict; use warnings; print "Once information has passed into protein it cannot get out again\n"; # See Crick 1970 %%perl # use 'my' the FIRST time you use a variable my $serine = "TCA"; my $codonLength = 3; print "Codon $serine has $codonLength bases\n"; %%perl my $serine = "TCA"; print "Serine is coded by $serine\n"; $serine = "TCC"; print "Serine is also coded by $serine\n"; %%perl my $age = 18; print "I am $age years old\n"; # two years later $age = $age + 2; print "I will be $age years old two years later\n"; %%perl my $dogma = "Once information gets into protein"; print "1: $dogma\n"; # '.' text concatenation operator $dogma = $dogma . " it cannot get out again"; print "2: $dogma\n"; $dogma = "DNA makes RNA makes Protein"; print "3: $dogma\n"; %%perl my $codon = "ACG"; print "$codon\n"; print '$codon\n'; %%perl my @numbers = (22,103,1,0); my @aminoAcids = ("Serine","Tyrosine","Leucine","Tyrosine","Cysteine"); my $serine = "TCA"; my @mixed = (23,$serine,0.3,"Histone"); %%perl my @aminoAcids = ("Serine","Tyrosine","Leucine","Tyrosine","Cysteine"); print "Amino acids: " , @aminoAcids , "\n"; %%perl my @aminoAcids = ("Serine","Tyrosine","Leucine","Tyrosine","Cysteine"); print "Amino acids, spaced: @aminoAcids\n"; %%perl -w my @aminoAcids = ("Serine","Tyrosine","Leucine","Tyrosine","Cysteine"); print $aminoAcids[2], "\n"; print $aminoAcids[0], "\n"; print $aminoAcids[-1], "\n"; %%perl -w my @aminoAcids = ("Serine","Tyrosine","Leucine","Tyrosine","Cysteine"); $aminoAcids[2] = "Cysteine"; print "@aminoAcids\n"; $aminoAcids[5] = "STOP"; print "@aminoAcids\n"; %%perl -w my @serine = ('T','C','A'); print "@serine\n"; my @cysteine = ('T', 'G', 'T'); print "@cysteine\n"; my @sc = (@serine, @cysteine); print "@sc\n"; print "$sc[4]\n"; %%perl -w my @serine = ('T','C','A'); my $length = @serine; print "$length"; %%perl -w # hash_assignment.pl my %geneticCode = ('TAT' => 'Tyrosine', 'TAC' => 'Tyrosine', 'CAA' => 'Glutamine', 'CAG' => 'Glutamine', ); %%perl -w # hash_assignment.pl my %geneticCode = ('TAT' => 'Tyrosine', 'TAC' => 'Tyrosine', 'CAA' => 'Glutamine', 'CAG' => 'Glutamine', ); print $geneticCode{'TAT'}; print "\n"; print "$geneticCode{'CAG'}\n"; %%perl -w # hash_assignment.pl my %geneticCode = ('TAT' => 'Tyrosine', 'TAC' => 'Tyrosine', 'CAA' => 'Glutamine', 'CAG' => 'Glutamine', ); my @codons = keys %geneticCode; my @aas = values %geneticCode; print "@codons\n"; print "@aas\n"; %%perl -w # hash_assignment.pl my %geneticCode = ('TAT' => 'Tyrosine', 'TAC' => 'Tyrosine', 'CAA' => 'Glutamine', 'CAG' => 'Glutamine', ); $geneticCode{'TGA'} = 'Cysteine'; print "$geneticCode{'TGA'}\n"; $geneticCode{'TGA'} = 'STOP'; # Whoops! Got the assignment wrong the first time print "$geneticCode{'TGA'}\n"; delete $geneticCode{'TGA'}; print "$geneticCode{'TGA'}\n"; %%perl -w # hash_assignment.pl my %geneticCode = ('TAT' => 'Tyrosine', 'TAC' => 'Tyrosine', 'CAA' => 'Glutamine', 'CAG' => 'Glutamine', ); my @sequence = ('CAA','CAG','TAC'); print $geneticCode{$sequence[0]}, "\n"; print $geneticCode{$sequence[1]}, "\n"; print $geneticCode{$sequence[2]}, "\n"; %%perl my %geneticCode = ('TAT' => 'Tyrosine', 'TAC' => 'Tyrosine', 'CAA' => 'Glutamine', 'CAG' => 'Glutamine', ); my @sequence = ('CAA','CAG','TAC'); foreach my $codon (@sequence) { print "$geneticCode{$codon}\n"; } %%perl my %geneExpression = ('Beta-Catenin' => 2.5, 'Beta-Actin' => 1.7, 'Pax6' => 0, 'HoxA2' => -3.2, ); foreach my $gene (keys %geneExpression) { print "$gene: $geneExpression{$gene}\n"; } %%perl my %geneExpression = ('Beta-Catenin' => 2.5, 'Beta-Actin' => 1.7, 'Pax6' => 0, 'HoxA2' => -3.2, ); while (my $gene = each %geneExpression) { print "$gene: $geneExpression{$gene}\n"; } %%perl -w my @sequence = ('CAA','CAG','TAG','CAT','GGT','GAG','GGC','CAG'); my $i = 0; while ($i<5) { print "$sequence[$i] "; $i = $i + 1; } %%perl my %geneExpression = ('Beta-Catenin' => 2.5, 'Beta-Actin' => 1.7, 'Pax6' => 0, 'HoxA2' => -3.2, ); foreach my $gene (keys %geneExpression) { if ($geneExpression{$gene} < 0) { print "$gene is downregulated\n"; } elsif ($geneExpression{$gene} > 0) { print "$gene is upregulated\n"; } else { print "No change in expression of $gene\n"; } } %%perl my @sequence = ('T','A','C','G','G','C','A','T','C','T','A','G'); my $i = 0; foreach my $base (@sequence) { $i = $i + 1; if ($i == 3) { $i = 0; } else { print "$base"; } } %%perl my @sequence = ('T','A','C','G','G','C','A','T','C','T','A','G'); my $i = 0; foreach my $base (@sequence) { $i = $i + 1; if ($i == 3) { $i = 0; next; } print "$base"; } %%perl my %geneticCode = ('TAT' => 'Tyrosine', 'TAC' => 'Tyrosine', 'CAA' => 'Glutamine', 'CAG' => 'Glutamine', 'TAG' => 'STOP', ); my @sequence = ('CAG','TAC','CAA','TAG','TAC','CAG','CAA'); foreach my $codon (@sequence) { if ($geneticCode{$codon} eq 'STOP') { last; } else { print "$geneticCode{$codon}\n"; } } %%perl my %geneticCode = ('TAT' => 'Tyrosine', 'TAC' => 'Tyrosine', 'CAA' => 'Glutamine', 'CAG' => 'Glutamine', 'TAG' => 'STOP', ); my @sequence = ('CAG','TAC','CAA','TAG','TAC','CAG','CAA'); foreach my $codon (@sequence) { last if ($geneticCode{$codon} eq 'STOP'); print "$geneticCode{$codon}\n"; } %%perl my %geneExpression = ('Beta-Catenin' => 2.5, 'Beta-Actin' => 1.7, 'Pax6' => 0, 'HoxA2' => -3.2, ); foreach my $gene (keys %geneExpression) { if ($geneExpression{$gene} < 0 or $geneExpression{$gene} > 0) { print "$gene has changed expression\n"; } else { print "$gene has not changed expression\n"; } if ($geneExpression{$gene} > 0 and $gene ne 'Beta-Actin') { print "$gene is upregulated and not a housekeeping gene\n"; } } %%perl # addition print "sum of 2 and 3 = "; my $num = 2 + 3; print "$num\n"; # subtraction print "$num minus 2 = "; $num = $num - 2; print "$num\n"; # multiplication print "product of $num and 3 = "; $num = $num * 3; print "$num\n"; # division print "$num divided by 2 = "; $num = $num / 2; print "$num\n"; # modulus print "the remainder of $num divided by 2 = "; my $remainder = $num % 2; print "$remainder\n"; # exponentiation print "$remainder raised to the power 2 = "; $num = $remainder ** 2; print "$num\n"; %%perl my $num = 1; print "Number was $num,"; $num *= 5; $num += 3; print " now $num\n"; # increment in place print "$num + 1 = "; $num++; print "$num\n"; # decrement in place print "$num - 1 = "; $num--; print "$num\n"; %%perl # absolute value print "the absolute value of 3 - 4.2 = "; my $num = abs 3 - 4.2; print "$num\n"; # conversion to integer print "int portion of $num = "; print int $num, "\n"; %%perl # absolute value print "the absolute value of 3 - 4.2 = "; my $num = abs(3 - 4.2); print "$num\n"; # conversion to integer print "int portion of $num = "; print(int($num), "\n"); %%perl my $str = "ACG" . "AGA" . "GCG"; print "$str\n"; $str .= "TGT"; print "$str\n"; %%perl # change case my $str = "ACGAGAGCGTGT"; print length $str, "\n"; print "$str\n"; $str = lc $str; print "$str\n"; print uc $str, "\n"; print "ucfirst: " , ucfirst(lc($str)),"\n"; print "lcfirst: " , lcfirst uc $str,"\n"; chop $str; print "Chopped: $str\n"; $str .= "\n"; print "Str now has a new line: $str$str$str"; chomp $str; print "Now it doesn't: $str$str$str"; %%perl my $seq = "ACG AGA GCG TGT"; my @codons = split ' ', $seq; foreach my $codon (@codons) { print "$codon\n"; } foreach my $base (split '', $seq) { next if $base eq " "; print "$base\n"; } %%perl my $str = "ACGAGAGCGTGT"; print substr($str, 3, 3),"\n"; print substr($str, 3), "\n"; print substr($str, -3), "\n"; print substr($str, 3, -3), "\n"; %%perl my @array = ('ACT','GCT','GAG','CAG'); print "Array is @array\n"; my $last = pop @array; print "Popped the last element off the array: @array. Here's the last element: $last\n"; push @array, 'CGT'; print "Added CGT to the end of the array: @array\n"; my $first = shift @array; print "Shifted the first element off the array: @array. Here's the first element: $first\n"; unshift @array, 'AAC'; print "Added AAC to the beginning of the array: @array\n"; %%perl my @array = ('ACT','GCT','GAG','CAG'); print "Array starts as @array\n"; my @spliced = splice @array, 1, 2; print "Removed @spliced to leave @array\n"; my @replacement = ('CGG','CCC','AGT','GAC'); @spliced = splice @array, 1, 1, @replacement; print "Removed @spliced and replaced with @replacement to leave @array\n"; %%perl my @bases = ('A','C','G','T','A','C','G','G','T','T','G','A'); my $seq = join '', @bases; print "@bases is now $seq\n"; $seq = join ':', @bases; print "We can use separators too: $seq\n"; %%perl my %geneExpression = ('Beta-Catenin' => 2.5, 'Beta-Actin' => 1.7, 'Pax6' => 0, 'HoxA2' => -3.2, ); my @genes = ('Pax6','EphB2','HoxA2'); for my $gene (@genes) { if (exists $geneExpression{$gene}) { print "Found $gene in hash, has expression $geneExpression{$gene}\n"; delete $geneExpression{$gene}; } else { print "Can't find $gene\n"; } } print "Hash now contains the following genes: ", join(' ', keys %geneExpression), " with the following values: ", join(' ', values %geneExpression), "\n"; %%perl my %geneticCode = ('TCT' => 'Serine', 'TCC' => 'Serine', 'CTA' => 'Leucine', 'CTG' => 'Leucine', 'TGT' => 'Cysteine', 'TAG' => 'STOP', ); my $sequence = 'TCCTGTCTACCCCTGAAGTCTTAG'; %%perl use List::Util ('max', 'min'); my @genome_mb = (100.3, 2700, 4.6, 3200, 12.1); print max(@genome_mb), "\n"; print min(@genome_mb), "\n"; %%perl my @genome_mb = (100.3, 2700); sub maximum { my ($a, $b)= @_; if ($a > $b) { return $a; } else { return $b; } } my $max = maximum(@genome_mb); print "Maximum genome size in our list is $max Mb\n"; %%perl my @genome_mb = (100.3, 2700); sub maximum2 { my $a = shift; my $b = shift; return ($a > $b) ? $a : $b; } my $max = maximum2 @genome_mb; print "Maximum genome size in our list is $max Mb\n"; %%file lesson11_example1.pl my @user_input = @ARGV; print join("\n" , @user_input) , "\n"; %%bash perl lesson11_example1.pl 23 BRCA2 0.5 %%file lesson11_example2.pl my $first_input = shift @ARGV; print "first input: $first_input\n"; %%bash perl lesson11_example2.pl one two three %%file lesson11_example3.pl my $first = shift; print "First: $first"; %%bash perl lesson11_example3.pl one two three %%file lesson11_example4.pl my $first_input = $ARGV[0]; my $second_input = $ARGV[1]; print "$first_input and $second_input\n"; %%bash perl lesson11_example4.pl one two three %%file lesson11_example5.pl my ($in_1, $in_2 , $in_3) = @ARGV; print "$in_1 and $in_2 and $in_3\n"; %%bash perl lesson11_example5.pl one two three %%file lesson12_example1.pl my $arg_1; if (defined $ARGV[0]){ $arg_1 = $ARGV[0]; print "Here's your number: $arg_1\n"; } else { die "Please supply a number\n"; } %%bash perl lesson12_example1.pl 3.14 perl lesson12_example1.pl %%file lesson12_example2.pl if (!defined $ARGV[0]) { die "Please supply a file\n"; } if (!-e $ARGV[0]) { die "$ARGV[0] does not exist. Please check the filename and/or location.\n"; } my $filename = $ARGV[0]; print "$filename is a file that exists!\n"; %%bash perl lesson12_example2.pl lesson12_example1.pl perl lesson12_example2.pl perl lesson12_example2.pl lesson12_example42.pl %%file lesson13_example1.pl my $file = shift @ARGV; open (my $fh , '<' , $file) or die "Can't open $file, $!\n"; print "opened file: $file\n"; %%bash perl lesson13_example1.pl lesson13_example1.pl %%bash cat lesson_files/lesson13_file1.txt cd lesson_files cat lesson13_file1.txt %%perl my $filename = 'lesson_files/lesson13_file1.txt'; open(my $inputfh, '<' , $filename) or die "can't open $filename\n"; while (my $line = <$inputfh>) { chomp $line; print uc $line , "\n"; } %%perl my $newfilename = 'lesson13_file2.txt'; open(my $newfh , '>' , $newfilename) or die "Can't open $newfilename\n"; print $newfh lc "Print this TO a FILE in LOWER case\n"; close $newfh; %%bash cat lesson13_file2.txt %%bash cat lesson_files/lesson13_file3.txt %%perl my $genefilename = 'lesson_files/lesson13_file3.txt'; open(my $genefh, '<' , $genefilename) or die "Can't open $genefilename\n"; while (my $line = <$genefh>) { chomp $line; my ($gene, $chr, $start, $end) = split "\t", $line; ## for csv (comma separated files) split on the comma # split ',' , $line; my $length = $end - $start + 1; print "$gene is on chromosome $chr and is $length bps long\n"; } close $genefh; %%bash cat lesson_files/lesson13_file4.txt %%bash cat lesson_files/lesson13_file5.fasta %%bash perl -lane 'print $. , ": ", length $F[0]' < lesson_files/lesson13_file4.txt %%bash perl -lane 'print "$F[0] is on chromosome $F[1]"' < lesson_files/lesson13_file3.txt %%perl my $sys = system ("date"); print "sys returned: $sys\n"; if ($sys == -1) { print "Error: $!\n"; } %%perl my $dir = `pwd`; print "I am currently in $dir\n"; %%perl my @array = ('Valine', 'Leucine','Cysteine','Tyrosine'); my @sorted_array = sort @array; foreach my $element (@sorted_array){ print "$element\n"; } my @numbers = (3,400,1,100,4,20); my @sorted_numbers = sort @numbers; foreach my $element (@sorted_numbers){ print "$element\n"; } %%perl my @numbers = (3,400,1,100,4,20); my @sorted_numbers = sort {$a <=> $b} @numbers; foreach my $element (@sorted_numbers){ print "$element\n"; } %%perl my @fragments = ( 'TCTCGAATC' , 'TTA' , 'A' , 'GCGTGATGTCGA' , 'GATC' ); my @sorted_by_length = sort { length($b) <=> length($a) } @fragments; foreach my $element( @sorted_by_length ) { print "$element\t", length ($element), "\n"; } %%perl my %nt_count = ( 'G' => 2, 'C' => 1, 'T' => 4, 'A' => 3, ); @sorted_by_values = sort {$nt_count{$a} <=> $nt_count{$b}} keys %nt_count; print join ' - ', @sorted_by_values; print "\n"; foreach my $nt( @sorted_by_values ) { print "$nt\t$nt_count{$nt}\n"; } %%perl my $dna = 'ACTTGATAG'; if ($dna =~ /TAG/) { print "$dna contains a stop codon\n"; } else { print "No stop codons found"; } %%perl my $dna = "ACAGGAGGATCTACTGCAGGCCAGCGCGAAGAGACTCATATAGGCAGACGAGAACGTAG"; if ($dna =~ /CTGCAG/) { print "Found PstI recognition site\n"; } if ($dna =~ /GA.TC/) { print "Found HinfI recognition site\n"; } if ($dna =~ /[AG]GATC[TC]/) { print "Found XhoII recognition site\n"; } if ($dna =~ /TAG$/) { print "Found STOP codon at the end of the sequence\n"; } %%perl foreach my $genename ("BRCA1","BRCA11","BRCA 1","BRCA1 gene") { if ($genename =~ /^\w+\d$/) { print "$genename contains one or more word characters, one digit, and nothing else\n"; } else { print "$genename contains something other than one or more word characters, one digit and nothing else\n"; } if ($genename =~ /\w+\s*\d+/) { print "$genename contains one or more word characters, optional whitespace and one or more digits\n"; } else { print "$genename contains something other than one or more word characters, optional whitespace and one or more digits\n"; } } %%perl my $dna = "ACGTGTAG"; if ($dna =~ /(TAG|TAA|TGA)/) { print "Found this stop codon: $1\n"; } %%perl my $dna = 'caccATGAGACAGAACAGTAGgggacagttgcacATGCCAGGACGACCCATATAATAGaca'; $dna = uc $dna; $dna =~ /(ATG[ACGT]+(TAG|TAA|TGA))/; print "$1\n"; %%perl my $dna = 'caccATGAGACAGAACAGTAGgggacagttgcacATGCCAGGACGACCCATATAATAGaca'; $dna = uc $dna; $dna =~ /(ATG[ACGT]+?(TAG|TAA|TGA))/; print "$1\n"; %%perl my $dna = 'caccATGAGACAGAACAGTAGgggacagttgcacATGCCAGGACGACCCATATAATAGaca'; my $dna = uc $dna; while ($dna =~ /(ATG[ACGT]+?(TAG|TAA|TGA))/g) { print "$1\n"; } %%perl my $dna = "CGAGGAGGATCTACTGCAGGCCAGCGCGAAGAGACTCATATAGGCAGACGAGAACGTCG"; print "$dna\n"; $dna =~ s/CG/TG/; print "$dna\n"; # Whoops! only mutates the first CG $dna =~ s/CG/TG/g; print "$dna\n"; %%perl my $psti = 'CTGCAG'; my $dna = "CGAGGACTGCAGGGATCTACTGCAGGCCAGCGCGAAGAGACTCATATACTGCAGCGTCG"; my @psti_frags = split /$psti/, $dna; print "Original sequence\n$dna\n"; print "PstI($psti) fragments:\n", join("\n", @psti_frags), "\n" %%perl my @codons = ('ACG','GAG','GCG','CCC','AGT','GAA'); my $codon_ref = \@codons; print "$codon_ref\n"; my %geneticCode = ('TCT' => 'Serine', 'TCC' => 'Serine', 'TAG' => 'STOP', ); my $geneticCode_ref = \%geneticCode; print "$geneticCode_ref\n"; %%perl use Data::Dumper; my @serine = ('TCA','TCC','TCG','TCT'); my @proline =('CCA','CCC','CCG','CCT'); my %aas; $aas{'serine'} = \@serine; $aas{'proline'} = \@proline; print Dumper \%aas; pop @serine; shift @proline; print "After array changes, the hash also changes\n"; print Dumper \%aas; %%perl -w use Data::Dumper; my %aas = ('serine' => ['TCA','TCC','TCG','TCT'], 'proline' => ['CCA','CCC','CCG','CCT'], ); my %codes; $codes{'earth'} = \%aas; $codes{'mars'} = {'serine' => ['QWZ','QWX','QWQ','QWW'], 'proline' => ['ZXZ','ZXX','ZXQ','ZXW'], }; print Dumper \%codes; print "$codes{'earth'}{'serine'}[0]\n"; print "$codes{'mars'}{'proline'}[1]\n"; %%perl my $mars_ref = {'serine' => ['QWZ','QWX','QWQ','QWW'], 'proline' => ['ZXZ','ZXX','ZXQ','ZXW'], }; my @aas = keys %{$mars_ref}; print "@aas\n"; %%perl use Data::Dumper; sub means { my @means; for my $array_ref (@_) { my $length = @{$array_ref}; my $total = 0; for my $num (@{$array_ref}) { $total += $num; } push @means, $total / $length; } return @means; } my @mean_out = means([1,2,3],[4,5,6],[7,8,9]); print Dumper \@mean_out;