6. In a multi-sequence FASTA file, produce statistics such as sequence number, average seq length, GC content, AT content, etc MJ

From Biolecture.org

Idea

I open three different FASTA files which are TERT, VEGFA, and Propreinsulin.

And, I caculate sequence length of each file, average sequence length of three files, GC content, AT content, and percentage of AT&GC content by using 'tr' function.

Code

#!/usr/bin/perl
use strict;
use warnings;

my$DNA1="TERT.txt";
open(fileHandle, $DNA1);
my@allLines1=<fileHandle>;
chomp(@allLines1);
my@protein1 = splice @allLines1, 1;
close(fileHandle);

my$DNA2="VEGFA.txt";
open(fileHandle, $DNA2);
my@allLines2=<fileHandle>;
chomp(@allLines2);
my@protein2 = splice @allLines2, 1;
close(fileHandle);

my$DNA3="Propreinsulin.txt";
open(fileHandle, $DNA3);
my@allLines3=<fileHandle>;
chomp(@allLines3);
my@protein3 = splice @allLines3, 1;
close(fileHandle);

my$pro1 = join "",@protein1;
print "TRET : ", $pro1, "\n";
my$pro2 = join "",@protein2;
print "VEGFA : ", $pro2, "\n";
my$pro3 = join "",@protein3;
print "Propreinsulin : ", $pro3, "\n";

print "\n";

my$length1=length$pro1;
print "The length of TRET sequence : $length1.\n";
my$length2=length$pro2;
print "The length of VEGFA sequence : $length2.\n";
my$length3=length$pro3;
print "The length of Propreinsulin sequence : $length3.\n";

my$average=($length1+$length2+$length3)/3;
print "The average length of multi-sequences : $average.\n";

print "\n";


my $i;
for($i=1; $i<=3; $i++)
{
    if($i == 1)
    {
        my$pro = $pro1;
        my$A=($pro=~tr/A/A/);
        my$C=($pro=~tr/C/C/);
        my$G=($pro=~tr/G/G/);
        my$T=($pro=~tr/T/T/);
        my$Total=$A+$C+$G+$T;
        print"<protein", $i, ">\n";
        print"The number of A is $A.\n";
        print"The number of C is $C.\n";
        print"The number of G is $G.\n";
        print"The number of T is $T.\n";

        #GC content
        my$GC=$G+$C;
        print"Total GC content : $GC.\n";
    
        #AT content
        my$AT=$A+$T;
        print"Total AT content : $AT.\n";
    
        #percentage of GC & AT content
        my$GCper=($GC/($Total)*100);
        print"The percentage of GC: $GCper%.\n";
        my$ATper=($AT/($Total)*100);
        print"The percentage of AT: $ATper%.\n";
        print "\n";
    };
    if($i == 2)
    {
        my$pro = $pro2;
        my$A=($pro=~tr/A/A/);
        my$C=($pro=~tr/C/C/);
        my$G=($pro=~tr/G/G/);
        my$T=($pro=~tr/T/T/);
        my$Total=$A+$C+$G+$T;
        print"<protein", $i, ">\n";
        print"The number of A is $A.\n";
        print"The number of C is $C.\n";
        print"The number of G is $G.\n";
        print"The number of T is $T.\n";

        #GC content
        #=~s:search, /이걸/이걸로/ 치환 (1:1아님), g:글로벌 매칭(스트링 전체를 훑고 매칭하는 것), 치환한 갯수를 결과로 나타냄
        my$GC=$G+$C;
        print"Total GC content : $GC.\n";
    
        #AT content
        my$AT=$A+$T;
        print"Total AT content : $AT.\n";
    
        #percentage of GC & AT
        my$GCper=($GC/($Total)*100);
        print"The percentage of GC: $GCper%.\n";
        my$ATper=($AT/($Total)*100);
        print"The percentage of AT: $ATper%.\n";
        print "\n";
    };
    if($i == 3)
    {
        my$pro = $pro3;
        my$A=($pro=~tr/A/A/);
        my$C=($pro=~tr/C/C/);
        my$G=($pro=~tr/G/G/);
        my$T=($pro=~tr/T/T/);
        my$Total=$A+$C+$G+$T;
        print"<protein", $i, ">\n";
        print"The number of A is $A.\n";
        print"The number of C is $C.\n";
        print"The number of G is $G.\n";
        print"The number of T is $T.\n";

        #GC content
        #=~s:search, /이걸/이걸로/ 치환 (1:1아님), g:글로벌 매칭(스트링 전체를 훑고 매칭하는 것), 치환한 갯수를 결과로 나타냄
        my$GC=$G+$C;
        print"Total GC content : $GC.\n";
    
        #AT content
        my$AT=$A+$T;
        print"Total AT content : $AT.\n";
    
        #percentage of GC & AT
        my$GCper=($GC/($Total)*100);
        print"The percentage of GC: $GCper%.\n";
        my$ATper=($AT/($Total)*100);
        print"The percentage of AT: $ATper%.\n";
        print "\n";
    };

};

exit;

Result

[[1]]