Difference between revisions of "BuKyung Align two protein sequences using a dynamic programming method in Perl"

From Biolecture.org
imported>Baik BuKyung
(Created page with "<p>Back to Baik BuKyung</p> <hr /> <p> </p>")
 
imported>Baik BuKyung
 
(3 intermediate revisions by the same user not shown)
Line 2: Line 2:
  
 
<hr />
 
<hr />
 +
<p><span style="font-size:24px">Source code:</span></p>
 +
 +
<div>
 +
<div>#!/usr/bin/perl<br />
 +
&nbsp;use strict;<br />
 +
&nbsp;use warnings;</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>&nbsp;my @sequence;<br />
 +
&nbsp;my $numberofseq=0;<br />
 +
&nbsp;while(&lt;&gt;){<br />
 +
&nbsp;if($_=~ /&gt;/){<br />
 +
&nbsp;&nbsp;$sequence[$numberofseq]{seqname}=$_;<br />
 +
&nbsp;&nbsp;$sequence[$numberofseq]{seqname}=~ s/\n//;<br />
 +
&nbsp;&nbsp;<br />
 +
}<br />
 +
&nbsp;else{<br />
 +
&nbsp;&nbsp;$sequence[$numberofseq]{seq}=$_;<br />
 +
&nbsp;&nbsp;$sequence[$numberofseq]{seq}=~ s/\n//;<br />
 +
&nbsp;&nbsp;$numberofseq++;<br />
 +
}<br />
 +
}</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>my $seq_1=$sequence[0]{seq};<br />
 +
my $seq_2=$sequence[1]{seq};</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>my $match =&nbsp; 1;<br />
 +
my $mismatch = -1;<br />
 +
my $indel = -1;</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>my @matrix;<br />
 +
$matrix[0][0]{score}&nbsp;&nbsp; = 0;<br />
 +
$matrix[0][0]{direction} = &quot;dead_end&quot;;</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>for(my $j = 1; $j &lt;= length($seq_1); $j++) {<br />
 +
&nbsp;&nbsp;&nbsp; $matrix[0][$j]{score} = $indel * $j;<br />
 +
&nbsp;&nbsp;&nbsp; $matrix[0][$j]{direction} = &quot;left&quot;;<br />
 +
}<br />
 +
for (my $i = 1; $i &lt;= length($seq_2); $i++) {<br />
 +
&nbsp;&nbsp;&nbsp; $matrix[$i][0]{score}&nbsp;&nbsp; = $indel * $i;<br />
 +
&nbsp;&nbsp;&nbsp; $matrix[$i][0]{direction} = &quot;top&quot;;<br />
 +
}</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div><br />
 +
for(my $i = 1; $i &lt;= length($seq_2); $i++) {<br />
 +
&nbsp;&nbsp;&nbsp; for(my $j = 1; $j &lt;= length($seq_1); $j++) {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; my ($diagonal, $left, $top);</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # calculate match score<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; my $char1 = substr($seq_1, $j-1, 1);<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; my $char2 = substr($seq_2, $i-1, 1);&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; if ($char1 eq $char2) {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $diagonal = $matrix[$i-1][$j-1]{score} + $match;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; else {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $diagonal = $matrix[$i-1][$j-1]{score} + $mismatch;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # calculate gap scores<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $top = $matrix[$i-1][$j]{score} + $indel;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $left = $matrix[$i][$j-1]{score} + $indel;</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # choose best score<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; if ($diagonal &gt;= $top) {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; if ($diagonal &gt;= $left) {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $matrix[$i][$j]{score} = $diagonal;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $matrix[$i][$j]{direction} = &quot;diagonal&quot;;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; else {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $matrix[$i][$j]{score} = $left;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $matrix[$i][$j]{direction} = &quot;left&quot;;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; } else {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; if ($top &gt;= $left) {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $matrix[$i][$j]{score}&nbsp;&nbsp; = $top;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $matrix[$i][$j]{direction} = &quot;top&quot;;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; else {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $matrix[$i][$j]{score}&nbsp;&nbsp; = $left;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $matrix[$i][$j]{direction} = &quot;left&quot;;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }<br />
 +
&nbsp;&nbsp;&nbsp; }<br />
 +
}</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>my $seq1_align = &quot;&quot;;<br />
 +
my $seq2_align = &quot;&quot;;</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>my $i = length($seq_2);<br />
 +
my $j = length($seq_1);</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div><br />
 +
while (!($matrix[$i][$j]{direction} eq &quot;dead_end&quot;)) {</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>&nbsp;&nbsp;&nbsp; if ($matrix[$i][$j]{direction} eq &quot;diagonal&quot;) {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $seq1_align = $seq1_align . substr($seq_1, $j-1, 1);<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $seq2_align = $seq2_align . substr($seq_2, $i-1, 1);<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $i--;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $j--;<br />
 +
&nbsp;&nbsp;&nbsp; }<br />
 +
&nbsp;&nbsp;&nbsp; elsif ($matrix[$i][$j]{direction} eq &quot;left&quot;) {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $seq1_align = $seq1_align . substr($seq_1, $j-1, 1);<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $seq2_align = $seq2_align . &quot;-&quot;;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $j--;<br />
 +
&nbsp;&nbsp;&nbsp; }<br />
 +
&nbsp;&nbsp;&nbsp; elsif ($matrix[$i][$j]{direction} eq &quot;top&quot;) {<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $seq1_align = $seq1_align . &quot;-&quot;;<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $seq2_align = $seq2_align . substr($seq_2, $i-1, 1);<br />
 +
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; $i--;<br />
 +
&nbsp;&nbsp;&nbsp; }&nbsp;&nbsp;&nbsp;<br />
 +
}</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>$seq1_align = reverse $seq1_align;<br />
 +
$seq2_align = reverse $seq2_align;</div>
 +
 +
<div>&nbsp;</div>
 +
 +
<div>print (&quot;\n&quot;,$seq1_align,&quot;\n&quot;, $seq2_align);
 +
<hr />
 +
<p><span style="font-size:24px">Algorithm</span></p>
 +
 +
<p><span style="font-size:16px">I used Needleman-Wunsch Algorithm.</span></p>
 +
 +
<p><img alt="" src="/ckfinder/userfiles/images/Needleman-Wunsch_pairwise_sequence_alignment.png" style="height:480px; width:480px" /></p>
 +
 +
<p><span style="font-size:16px">I used Needleman-Wunsch Algorithm, I assigned the score with match, mismatch and gap.</span></p>
 +
 +
<p><span style="font-size:16px">Picture reference: </span>https://commons.wikimedia.org/wiki/File:Needleman-Wunsch_pairwise_sequence_alignment.png#/media/File:Needleman-Wunsch_pairwise_sequence_alignment.png</p>
 +
 +
<p>Algorithm explanation: https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm</p>
 +
 
<p>&nbsp;</p>
 
<p>&nbsp;</p>
 +
</div>
 +
</div>
 +
 +
<hr />
 +
<div>
 +
<p><span style="font-size:24px">Result</span></p>
 +
 +
<p><img alt="" src="/ckfinder/userfiles/images/%EC%BA%A1%EC%B2%9822(1).PNG" style="height:21px; width:317px" /></p>
 +
 +
<p><span style="font-size:16px">After the 9.pl is executed.</span></p>
 +
 +
<p><span style="font-size:16px">The original content in the outer.fasta file is</span></p>
 +
 +
<p><em>&gt;0<br />
 +
FNFSSDMRRVKRERMLKMVSKPLKRRCYHLSGCVALCAIPEELDRICKGKIPHDDHCRPVARFHSVECYCGCNQNQQYDAGGVNMHRSLMQAEGHREPNH<br />
 +
&gt;1<br />
 +
LADIDFNVHCQEWDENRAGEWVHQDPKVNWRFILCIRLFGMEGFKNQAHLDMVEKLYIYPLPNAWKWYKALCHYAKHAMCLQDESYECKYQRPWIMDKVC<br />
 +
&gt;2<br />
 +
QWCRYLRMQREGQRFGVYRRVCIVICSVFFEWADQMYMSHRCVKAWRWWECQRFDIVIRSPRLNRLFCNHEGAHAAKWVWPCLMHRLLDFNCLQNSMRCR<br />
 +
&gt;3<br />
 +
PKCAPQRAHLPYGARHHWGDSLHMGEHAVSCQRKWDMFHGIRHFPKSDAGMQWFNEKERPKKWCAVFKYELFVPLMRPHAQGQYSRELREVAAHRLCSCP<br />
 +
&gt;4<br />
 +
RVQDLFNEGEIAEVWCNQYWESFSKARVGCMGISYPRSMFWMCFRPGFCYNQHEQIVQPALLLQVNGYRIQNDEMNEVRIMDPPIRYDMWPCVPLKDDLS</em></p>
 +
 +
<p>&nbsp;</p>
 +
 +
<p>This file is generated by the [[BuKyung Randomly generate five 100 AA long protein sequences and store them in a FASTA file]] assignment.</p>
 +
 +
<p>I used the 0 and 1 sequence for the alignment.</p>
 +
 +
<p>&nbsp;</p>
 +
 +
<p><span style="font-size:16px">The printed&nbsp;result is</span></p>
 +
 +
<p><img alt="" src="/ckfinder/userfiles/images/%EC%BA%A1%EC%B2%9823.PNG" style="height:76px; width:992px" /></p>
 +
 +
<p>&nbsp;</p>
 +
 +
<div>&nbsp;</div>
 +
</div>

Latest revision as of 05:20, 17 June 2016

Back to Baik BuKyung


Source code:

#!/usr/bin/perl

 use strict;

 use warnings;
 
 my @sequence;

 my $numberofseq=0;
 while(<>){
 if($_=~ />/){
  $sequence[$numberofseq]{seqname}=$_;
  $sequence[$numberofseq]{seqname}=~ s/\n//;
  
}
 else{
  $sequence[$numberofseq]{seq}=$_;
  $sequence[$numberofseq]{seq}=~ s/\n//;
  $numberofseq++;
}

}
 
my $seq_1=$sequence[0]{seq};
my $seq_2=$sequence[1]{seq};
 
 
 
my $match =  1;

my $mismatch = -1;

my $indel = -1;
 
my @matrix;

$matrix[0][0]{score}   = 0;

$matrix[0][0]{direction} = "dead_end";
 
for(my $j = 1; $j <= length($seq_1); $j++) {

    $matrix[0][$j]{score} = $indel * $j;
    $matrix[0][$j]{direction} = "left";
}
for (my $i = 1; $i <= length($seq_2); $i++) {
    $matrix[$i][0]{score}   = $indel * $i;
    $matrix[$i][0]{direction} = "top";

}
 

for(my $i = 1; $i <= length($seq_2); $i++) {
    for(my $j = 1; $j <= length($seq_1); $j++) {

        my ($diagonal, $left, $top);
 
        # calculate match score

        my $char1 = substr($seq_1, $j-1, 1);
        my $char2 = substr($seq_2, $i-1, 1);                           
        if ($char1 eq $char2) {
            $diagonal = $matrix[$i-1][$j-1]{score} + $match;
        }
        else {
            $diagonal = $matrix[$i-1][$j-1]{score} + $mismatch;

        }
 
        # calculate gap scores

        $top = $matrix[$i-1][$j]{score} + $indel;

        $left = $matrix[$i][$j-1]{score} + $indel;
 
        # choose best score

        if ($diagonal >= $top) {
            if ($diagonal >= $left) {
                $matrix[$i][$j]{score} = $diagonal;
                $matrix[$i][$j]{direction} = "diagonal";
            }
        else {
                $matrix[$i][$j]{score} = $left;
                $matrix[$i][$j]{direction} = "left";
            }
        } else {
            if ($top >= $left) {
                $matrix[$i][$j]{score}   = $top;
                $matrix[$i][$j]{direction} = "top";
            }
            else {
                $matrix[$i][$j]{score}   = $left;
                $matrix[$i][$j]{direction} = "left";
            }
        }
    }

}
 
 
 
my $seq1_align = "";
my $seq2_align = "";
 
my $i = length($seq_2);
my $j = length($seq_1);
 

while (!($matrix[$i][$j]{direction} eq "dead_end")) {
 
    if ($matrix[$i][$j]{direction} eq "diagonal") {

        $seq1_align = $seq1_align . substr($seq_1, $j-1, 1);
        $seq2_align = $seq2_align . substr($seq_2, $i-1, 1);
        $i--;
        $j--;
    }
    elsif ($matrix[$i][$j]{direction} eq "left") {
        $seq1_align = $seq1_align . substr($seq_1, $j-1, 1);
        $seq2_align = $seq2_align . "-";
        $j--;
    }
    elsif ($matrix[$i][$j]{direction} eq "top") {
        $seq1_align = $seq1_align . "-";
        $seq2_align = $seq2_align . substr($seq_2, $i-1, 1);
        $i--;
    }   

}
 
$seq1_align = reverse $seq1_align;
$seq2_align = reverse $seq2_align;
 
print ("\n",$seq1_align,"\n", $seq2_align);

Algorithm

I used Needleman-Wunsch Algorithm.

I used Needleman-Wunsch Algorithm, I assigned the score with match, mismatch and gap.

Picture reference: https://commons.wikimedia.org/wiki/File:Needleman-Wunsch_pairwise_sequence_alignment.png#/media/File:Needleman-Wunsch_pairwise_sequence_alignment.png

Algorithm explanation: https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm

 


Result

After the 9.pl is executed.

The original content in the outer.fasta file is

>0
FNFSSDMRRVKRERMLKMVSKPLKRRCYHLSGCVALCAIPEELDRICKGKIPHDDHCRPVARFHSVECYCGCNQNQQYDAGGVNMHRSLMQAEGHREPNH
>1
LADIDFNVHCQEWDENRAGEWVHQDPKVNWRFILCIRLFGMEGFKNQAHLDMVEKLYIYPLPNAWKWYKALCHYAKHAMCLQDESYECKYQRPWIMDKVC
>2
QWCRYLRMQREGQRFGVYRRVCIVICSVFFEWADQMYMSHRCVKAWRWWECQRFDIVIRSPRLNRLFCNHEGAHAAKWVWPCLMHRLLDFNCLQNSMRCR
>3
PKCAPQRAHLPYGARHHWGDSLHMGEHAVSCQRKWDMFHGIRHFPKSDAGMQWFNEKERPKKWCAVFKYELFVPLMRPHAQGQYSRELREVAAHRLCSCP
>4
RVQDLFNEGEIAEVWCNQYWESFSKARVGCMGISYPRSMFWMCFRPGFCYNQHEQIVQPALLLQVNGYRIQNDEMNEVRIMDPPIRYDMWPCVPLKDDLS

 

This file is generated by the BuKyung Randomly generate five 100 AA long protein sequences and store them in a FASTA file assignment.

I used the 0 and 1 sequence for the alignment.

 

The printed result is