Changes

← Older edit

Homework 6. In a multi-sequence FASTA file, produce statistics such as sequence number, average seq length, GC content, AT content, etc

8,616 bytes added, 19:29, 27 May 2017

no edit summary

It is Myeongji`s idea

#!/usr/bin/perl

use strict;

use warnings;

my$DNA1="TRET.txt";

open(fileHandle, $DNA1);

my@allLines1=<fileHandle>;

chomp(@allLines1);

my@protein1 = splice @allLines1, 1;

close(fileHandle);

my$DNA2="VEGFA.txt";

open(fileHandle, $DNA2);

my@allLines2=<fileHandle>;

chomp(@allLines2);

my@protein2 = splice @allLines2, 1;

close(fileHandle);

my$DNA3="Propreinsulin.txt";

open(fileHandle, $DNA3);

my@allLines3=<fileHandle>;

chomp(@allLines3);

my@protein3 = splice @allLines3, 1;

close(fileHandle);

my$pro1 = join "",@protein1;

print "TRET : ", $pro1, "\n";

my$pro2 = join "",@protein2;

print "VEGFA : ", $pro2, "\n";

my$pro3 = join "",@protein3;

print "Propreinsulin : ", $pro3, "\n";

print "\n";

my$length1=length$pro1;

print "The length of TRET sequence : $length1.\n";

my$length2=length$pro2;

print "The length of VEGFA sequence : $length2.\n";

my$length3=length$pro3;

print "The length of Propreinsulin sequence : $length3.\n";

my$average=($length1+$length2+$length3)/3;

print "The average length of multi-sequences : $average.\n";

print "\n";

# =~tr/AT/BC/ : A를 B로, T를 C로 교환 (1:1)

my $i;

for($i=1; $i<=3; $i++)

{

if($i == 1)

{

my$pro = $pro1;

my$a=($pro=~tr/A/A/);

my$b=($pro=~tr/C/C/);

my$c=($pro=~tr/G/G/);

my$d=($pro=~tr/T/T/);

my$Total=$a+$b+$c+$d;

print"<protein", $i, ">\n";

print"The number of A is $a.\n";

print"The number of C is $b.\n";

print"The number of G is $c.\n";

print"The number of T is $d.\n";

print"Total bases in DNA : $Total.\n";

#count of GC

#=~s:search, /이걸/이걸로/ 치환 (1:1아님), g:글로벌 매칭(스트링 전체를 훑고 매칭하는 것), 치환한 갯수를 결과로 나타냄

my$GC=($pro=~s/GC/GC/g);

print"The total number of dinucleotide GC in DNA : $GC.\n";

#count of AT

my$AT=($pro=~s/AT/AT/g);

print"The total number of dinucleotide AT in DNA : $AT.\n";

#percentage of GC & AT

my$GCper=($GC/($Total)*100);

print"The percentage of GC: $GCper%.\n";

my$ATper=($AT/($Total)*100);

print"The percentage of AT: $ATper%.\n";

print "\n";

};

if($i == 2)

{

my$pro = $pro2;

my$a=($pro=~tr/A/A/);

my$b=($pro=~tr/C/C/);

my$c=($pro=~tr/G/G/);

my$d=($pro=~tr/T/T/);

my$Total=$a+$b+$c+$d;

print"<protein", $i, ">\n";

print"The number of A is $a.\n";

print"The number of C is $b.\n";

print"The number of G is $c.\n";

print"The number of T is $d.\n";

print"Total bases in DNA : $Total.\n";

#count of GC

#=~s:search, /이걸/이걸로/ 치환 (1:1아님), g:글로벌 매칭(스트링 전체를 훑고 매칭하는 것), 치환한 갯수를 결과로 나타냄

my$GC=($pro=~s/GC/GC/g);

print"The total number of dinucleotide GC in DNA : $GC.\n";

#count of AT

my$AT=($pro=~s/AT/AT/g);

print"The total number of dinucleotide AT in DNA : $AT.\n";

#percentage of GC & AT

my$GCper=($GC/($Total)*100);

print"The percentage of GC: $GCper%.\n";

my$ATper=($AT/($Total)*100);

print"The percentage of AT: $ATper%.\n";

print "\n";

};

if($i == 3)

{

my$pro = $pro3;

my$a=($pro=~tr/A/A/);

my$b=($pro=~tr/C/C/);

my$c=($pro=~tr/G/G/);

my$d=($pro=~tr/T/T/);

my$Total=$a+$b+$c+$d;

print"<protein", $i, ">\n";

print"The number of A is $a.\n";

print"The number of C is $b.\n";

print"The number of G is $c.\n";

print"The number of T is $d.\n";

print"Total bases in DNA : $Total.\n";

#count of GC

#=~s:search, /이걸/이걸로/ 치환 (1:1아님), g:글로벌 매칭(스트링 전체를 훑고 매칭하는 것), 치환한 갯수를 결과로 나타냄

my$GC=($pro=~s/GC/GC/g);

print"The total number of dinucleotide GC in DNA : $GC.\n";

#count of AT

my$AT=($pro=~s/AT/AT/g);

print"The total number of dinucleotide AT in DNA : $AT.\n";

#percentage of GC & AT

my$GCper=($GC/($Total)*100);

print"The percentage of GC: $GCper%.\n";

my$ATper=($AT/($Total)*100);

print"The percentage of AT: $ATper%.\n";

print "\n";

};

};

exit;

my $file = <FASTA>;

my @R = split "",$file;

my $A =0;

foreach my $nuc (@R) {

if ($nuc eq 'A') {

$A = $A+1;

}

}

Anonymous user

imported>YoungKwang Jung

Biolecture.org β

Changes

Homework 6. In a multi-sequence FASTA file, produce statistics such as sequence number, average seq length, GC content, AT content, etc

Biolecture.org ^β