Difference between revisions of "BuKyung Align two protein sequences using a dynamic programming method in Perl"
imported>Baik BuKyung (Created page with "<p>Back to Baik BuKyung</p> <hr /> <p> </p>") |
imported>Baik BuKyung |
||
(3 intermediate revisions by the same user not shown) | |||
Line 2: | Line 2: | ||
<hr /> | <hr /> | ||
+ | <p><span style="font-size:24px">Source code:</span></p> | ||
+ | |||
+ | <div> | ||
+ | <div>#!/usr/bin/perl<br /> | ||
+ | use strict;<br /> | ||
+ | use warnings;</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div> my @sequence;<br /> | ||
+ | my $numberofseq=0;<br /> | ||
+ | while(<>){<br /> | ||
+ | if($_=~ />/){<br /> | ||
+ | $sequence[$numberofseq]{seqname}=$_;<br /> | ||
+ | $sequence[$numberofseq]{seqname}=~ s/\n//;<br /> | ||
+ | <br /> | ||
+ | }<br /> | ||
+ | else{<br /> | ||
+ | $sequence[$numberofseq]{seq}=$_;<br /> | ||
+ | $sequence[$numberofseq]{seq}=~ s/\n//;<br /> | ||
+ | $numberofseq++;<br /> | ||
+ | }<br /> | ||
+ | }</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div>my $seq_1=$sequence[0]{seq};<br /> | ||
+ | my $seq_2=$sequence[1]{seq};</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div>my $match = 1;<br /> | ||
+ | my $mismatch = -1;<br /> | ||
+ | my $indel = -1;</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div>my @matrix;<br /> | ||
+ | $matrix[0][0]{score} = 0;<br /> | ||
+ | $matrix[0][0]{direction} = "dead_end";</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div>for(my $j = 1; $j <= length($seq_1); $j++) {<br /> | ||
+ | $matrix[0][$j]{score} = $indel * $j;<br /> | ||
+ | $matrix[0][$j]{direction} = "left";<br /> | ||
+ | }<br /> | ||
+ | for (my $i = 1; $i <= length($seq_2); $i++) {<br /> | ||
+ | $matrix[$i][0]{score} = $indel * $i;<br /> | ||
+ | $matrix[$i][0]{direction} = "top";<br /> | ||
+ | }</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div><br /> | ||
+ | for(my $i = 1; $i <= length($seq_2); $i++) {<br /> | ||
+ | for(my $j = 1; $j <= length($seq_1); $j++) {<br /> | ||
+ | my ($diagonal, $left, $top);</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div> # calculate match score<br /> | ||
+ | my $char1 = substr($seq_1, $j-1, 1);<br /> | ||
+ | my $char2 = substr($seq_2, $i-1, 1); <br /> | ||
+ | if ($char1 eq $char2) {<br /> | ||
+ | $diagonal = $matrix[$i-1][$j-1]{score} + $match;<br /> | ||
+ | }<br /> | ||
+ | else {<br /> | ||
+ | $diagonal = $matrix[$i-1][$j-1]{score} + $mismatch;<br /> | ||
+ | }</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div> # calculate gap scores<br /> | ||
+ | $top = $matrix[$i-1][$j]{score} + $indel;<br /> | ||
+ | $left = $matrix[$i][$j-1]{score} + $indel;</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div> # choose best score<br /> | ||
+ | if ($diagonal >= $top) {<br /> | ||
+ | if ($diagonal >= $left) {<br /> | ||
+ | $matrix[$i][$j]{score} = $diagonal;<br /> | ||
+ | $matrix[$i][$j]{direction} = "diagonal";<br /> | ||
+ | }<br /> | ||
+ | else {<br /> | ||
+ | $matrix[$i][$j]{score} = $left;<br /> | ||
+ | $matrix[$i][$j]{direction} = "left";<br /> | ||
+ | }<br /> | ||
+ | } else {<br /> | ||
+ | if ($top >= $left) {<br /> | ||
+ | $matrix[$i][$j]{score} = $top;<br /> | ||
+ | $matrix[$i][$j]{direction} = "top";<br /> | ||
+ | }<br /> | ||
+ | else {<br /> | ||
+ | $matrix[$i][$j]{score} = $left;<br /> | ||
+ | $matrix[$i][$j]{direction} = "left";<br /> | ||
+ | }<br /> | ||
+ | }<br /> | ||
+ | }<br /> | ||
+ | }</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div>my $seq1_align = "";<br /> | ||
+ | my $seq2_align = "";</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div>my $i = length($seq_2);<br /> | ||
+ | my $j = length($seq_1);</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div><br /> | ||
+ | while (!($matrix[$i][$j]{direction} eq "dead_end")) {</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div> if ($matrix[$i][$j]{direction} eq "diagonal") {<br /> | ||
+ | $seq1_align = $seq1_align . substr($seq_1, $j-1, 1);<br /> | ||
+ | $seq2_align = $seq2_align . substr($seq_2, $i-1, 1);<br /> | ||
+ | $i--;<br /> | ||
+ | $j--;<br /> | ||
+ | }<br /> | ||
+ | elsif ($matrix[$i][$j]{direction} eq "left") {<br /> | ||
+ | $seq1_align = $seq1_align . substr($seq_1, $j-1, 1);<br /> | ||
+ | $seq2_align = $seq2_align . "-";<br /> | ||
+ | $j--;<br /> | ||
+ | }<br /> | ||
+ | elsif ($matrix[$i][$j]{direction} eq "top") {<br /> | ||
+ | $seq1_align = $seq1_align . "-";<br /> | ||
+ | $seq2_align = $seq2_align . substr($seq_2, $i-1, 1);<br /> | ||
+ | $i--;<br /> | ||
+ | } <br /> | ||
+ | }</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div>$seq1_align = reverse $seq1_align;<br /> | ||
+ | $seq2_align = reverse $seq2_align;</div> | ||
+ | |||
+ | <div> </div> | ||
+ | |||
+ | <div>print ("\n",$seq1_align,"\n", $seq2_align); | ||
+ | <hr /> | ||
+ | <p><span style="font-size:24px">Algorithm</span></p> | ||
+ | |||
+ | <p><span style="font-size:16px">I used Needleman-Wunsch Algorithm.</span></p> | ||
+ | |||
+ | <p><img alt="" src="/ckfinder/userfiles/images/Needleman-Wunsch_pairwise_sequence_alignment.png" style="height:480px; width:480px" /></p> | ||
+ | |||
+ | <p><span style="font-size:16px">I used Needleman-Wunsch Algorithm, I assigned the score with match, mismatch and gap.</span></p> | ||
+ | |||
+ | <p><span style="font-size:16px">Picture reference: </span>https://commons.wikimedia.org/wiki/File:Needleman-Wunsch_pairwise_sequence_alignment.png#/media/File:Needleman-Wunsch_pairwise_sequence_alignment.png</p> | ||
+ | |||
+ | <p>Algorithm explanation: https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm</p> | ||
+ | |||
<p> </p> | <p> </p> | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <hr /> | ||
+ | <div> | ||
+ | <p><span style="font-size:24px">Result</span></p> | ||
+ | |||
+ | <p><img alt="" src="/ckfinder/userfiles/images/%EC%BA%A1%EC%B2%9822(1).PNG" style="height:21px; width:317px" /></p> | ||
+ | |||
+ | <p><span style="font-size:16px">After the 9.pl is executed.</span></p> | ||
+ | |||
+ | <p><span style="font-size:16px">The original content in the outer.fasta file is</span></p> | ||
+ | |||
+ | <p><em>>0<br /> | ||
+ | FNFSSDMRRVKRERMLKMVSKPLKRRCYHLSGCVALCAIPEELDRICKGKIPHDDHCRPVARFHSVECYCGCNQNQQYDAGGVNMHRSLMQAEGHREPNH<br /> | ||
+ | >1<br /> | ||
+ | LADIDFNVHCQEWDENRAGEWVHQDPKVNWRFILCIRLFGMEGFKNQAHLDMVEKLYIYPLPNAWKWYKALCHYAKHAMCLQDESYECKYQRPWIMDKVC<br /> | ||
+ | >2<br /> | ||
+ | QWCRYLRMQREGQRFGVYRRVCIVICSVFFEWADQMYMSHRCVKAWRWWECQRFDIVIRSPRLNRLFCNHEGAHAAKWVWPCLMHRLLDFNCLQNSMRCR<br /> | ||
+ | >3<br /> | ||
+ | PKCAPQRAHLPYGARHHWGDSLHMGEHAVSCQRKWDMFHGIRHFPKSDAGMQWFNEKERPKKWCAVFKYELFVPLMRPHAQGQYSRELREVAAHRLCSCP<br /> | ||
+ | >4<br /> | ||
+ | RVQDLFNEGEIAEVWCNQYWESFSKARVGCMGISYPRSMFWMCFRPGFCYNQHEQIVQPALLLQVNGYRIQNDEMNEVRIMDPPIRYDMWPCVPLKDDLS</em></p> | ||
+ | |||
+ | <p> </p> | ||
+ | |||
+ | <p>This file is generated by the [[BuKyung Randomly generate five 100 AA long protein sequences and store them in a FASTA file]] assignment.</p> | ||
+ | |||
+ | <p>I used the 0 and 1 sequence for the alignment.</p> | ||
+ | |||
+ | <p> </p> | ||
+ | |||
+ | <p><span style="font-size:16px">The printed result is</span></p> | ||
+ | |||
+ | <p><img alt="" src="/ckfinder/userfiles/images/%EC%BA%A1%EC%B2%9823.PNG" style="height:76px; width:992px" /></p> | ||
+ | |||
+ | <p> </p> | ||
+ | |||
+ | <div> </div> | ||
+ | </div> |
Latest revision as of 05:20, 17 June 2016
Back to Baik BuKyung
Source code:
use strict;
my $numberofseq=0;
while(<>){
if($_=~ />/){
$sequence[$numberofseq]{seqname}=$_;
$sequence[$numberofseq]{seqname}=~ s/\n//;
}
else{
$sequence[$numberofseq]{seq}=$_;
$sequence[$numberofseq]{seq}=~ s/\n//;
$numberofseq++;
}
my $seq_2=$sequence[1]{seq};
my $mismatch = -1;
$matrix[0][0]{score} = 0;
$matrix[0][$j]{score} = $indel * $j;
$matrix[0][$j]{direction} = "left";
}
for (my $i = 1; $i <= length($seq_2); $i++) {
$matrix[$i][0]{score} = $indel * $i;
$matrix[$i][0]{direction} = "top";
for(my $i = 1; $i <= length($seq_2); $i++) {
for(my $j = 1; $j <= length($seq_1); $j++) {
my $char1 = substr($seq_1, $j-1, 1);
my $char2 = substr($seq_2, $i-1, 1);
if ($char1 eq $char2) {
$diagonal = $matrix[$i-1][$j-1]{score} + $match;
}
else {
$diagonal = $matrix[$i-1][$j-1]{score} + $mismatch;
$top = $matrix[$i-1][$j]{score} + $indel;
if ($diagonal >= $top) {
if ($diagonal >= $left) {
$matrix[$i][$j]{score} = $diagonal;
$matrix[$i][$j]{direction} = "diagonal";
}
else {
$matrix[$i][$j]{score} = $left;
$matrix[$i][$j]{direction} = "left";
}
} else {
if ($top >= $left) {
$matrix[$i][$j]{score} = $top;
$matrix[$i][$j]{direction} = "top";
}
else {
$matrix[$i][$j]{score} = $left;
$matrix[$i][$j]{direction} = "left";
}
}
}
my $seq2_align = "";
my $j = length($seq_1);
while (!($matrix[$i][$j]{direction} eq "dead_end")) {
$seq1_align = $seq1_align . substr($seq_1, $j-1, 1);
$seq2_align = $seq2_align . substr($seq_2, $i-1, 1);
$i--;
$j--;
}
elsif ($matrix[$i][$j]{direction} eq "left") {
$seq1_align = $seq1_align . substr($seq_1, $j-1, 1);
$seq2_align = $seq2_align . "-";
$j--;
}
elsif ($matrix[$i][$j]{direction} eq "top") {
$seq1_align = $seq1_align . "-";
$seq2_align = $seq2_align . substr($seq_2, $i-1, 1);
$i--;
}
$seq2_align = reverse $seq2_align;
Algorithm
I used Needleman-Wunsch Algorithm.
I used Needleman-Wunsch Algorithm, I assigned the score with match, mismatch and gap.
Picture reference: https://commons.wikimedia.org/wiki/File:Needleman-Wunsch_pairwise_sequence_alignment.png#/media/File:Needleman-Wunsch_pairwise_sequence_alignment.png
Algorithm explanation: https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
Result
After the 9.pl is executed.
The original content in the outer.fasta file is
>0
FNFSSDMRRVKRERMLKMVSKPLKRRCYHLSGCVALCAIPEELDRICKGKIPHDDHCRPVARFHSVECYCGCNQNQQYDAGGVNMHRSLMQAEGHREPNH
>1
LADIDFNVHCQEWDENRAGEWVHQDPKVNWRFILCIRLFGMEGFKNQAHLDMVEKLYIYPLPNAWKWYKALCHYAKHAMCLQDESYECKYQRPWIMDKVC
>2
QWCRYLRMQREGQRFGVYRRVCIVICSVFFEWADQMYMSHRCVKAWRWWECQRFDIVIRSPRLNRLFCNHEGAHAAKWVWPCLMHRLLDFNCLQNSMRCR
>3
PKCAPQRAHLPYGARHHWGDSLHMGEHAVSCQRKWDMFHGIRHFPKSDAGMQWFNEKERPKKWCAVFKYELFVPLMRPHAQGQYSRELREVAAHRLCSCP
>4
RVQDLFNEGEIAEVWCNQYWESFSKARVGCMGISYPRSMFWMCFRPGFCYNQHEQIVQPALLLQVNGYRIQNDEMNEVRIMDPPIRYDMWPCVPLKDDLS
This file is generated by the BuKyung Randomly generate five 100 AA long protein sequences and store them in a FASTA file assignment.
I used the 0 and 1 sequence for the alignment.
The printed result is