Source code

From Biolecture.org
Revision as of 04:08, 20 May 2016 by imported>Baik BuKyung (Created page with "<div>import random</div> <div> </div> <div>def random_sequence():<br />     SL = open('sequence_list.txt','w')<br />     numb...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
import random
 
def random_sequence():

    SL = open('sequence_list.txt','w')
    number = int(input('How many genes do you want?: '))
    for n in range(number):
        gene_name = input('input gene name: ')
        SL.write('>' + gene_name + '\n')
        print('input the length of ',gene_name,': ',end='')
        length = int(input())
        sequence = str()
        for i in range(length):
            sequence += random_base()
        SL.write(sequence + '\n')
    SL.close()

    return
 

def random_base():
    i = random.randrange(1,5)
    if i == 1:
        return 'A'
    elif i == 2:
        return 'T'
    elif i == 3:
        return 'G'
    elif i == 4:

        return 'C'
 
def reverse():

    SL = open('sequence_list.txt','r')
    RL = open('reverse_list.txt','w')
    read = SL.readlines()
    rev_name = list()

    rev = list()
 
    for i in range(0,len(read)):

        if i % 2 == 0:
            rev_name.append(read[i])
        else:

            rev.append(read[i])
 
    for k in range(0,len(rev)):

        rev_seq = str()
        for i in range(0,len(rev[k])):
            if rev[k][i] == 'A':
                rev_seq = 'T' + rev_seq
            elif rev[k][i] == 'T':
                rev_seq = 'A' + rev_seq
            elif rev[k][i] == 'G':
                rev_seq = 'C' + rev_seq
            elif rev[k][i] == 'C':
                rev_seq = 'G' + rev_seq
        RL.write(rev_name[k][:-1]+'_rev\n')
        RL.write(rev_seq + '\n')
    SL.close()
    RL.close()
    return
   
   
def ORF_seq(name,seq):
    file_name = name[1:-1]
    ORF_file = open((file_name)+'_ORF.txt','w')
    ini_cod = list()
    ter_cod = list()
   
    for i in range(0,len(seq)-2):
        if seq[i:i+3] == 'ATG':

            ini_cod.append(i)
 
    for i in range(0,len(seq)-2):

        if seq[i:i+3] == 'TGA':
            ter_cod.append(i)
        elif seq[i:i+3] == 'TAA':           
            ter_cod.append(i)
        elif seq[i:i+3] == 'TAG':
            ter_cod.append(i)
    p=0
    for i in range(0,len(ini_cod)):
        for j in range(0,len(ter_cod)):
            if ter_cod[j] > ini_cod[i] and (ter_cod[j] - ini_cod[i]) % 3 == 0:
                p+=1
                ORF_file.write(">ORF_"+str(p)+"\n")
                ORF_file.write(seq[ini_cod[i]:(ter_cod[j]+3)]+'\n')

                break
 
    ORF_file.close
    return
 
def ORF():

    file_name = input('input file name to find ORF in directory: ')
    fseq = open(file_name,'r')
    gene_list = fseq.readlines()
    name_list = list()
    seq_list = list()
   
    for i in range(0,len(gene_list)):
        if i % 2 == 0:
            name_list.append(gene_list[i])
        else:

            seq_list.append(gene_list[i])
 
    for i in range(0,len(seq_list)):

        ORF_seq(name_list[i],seq_list[i])
       

    fseq.close()
 
    return

   
def TRANS():
    file_name = input('input ORF file name to translate in directory: ')
    fseq = open(file_name,'r')
    trans_file = open((file_name[:-4] + '_trans.txt'),'w')
    seq = fseq.readlines()
   
    codon_table = {"TTT":"F", "TTC":"F", "TTA":"L", "TTG":"L",
    "TCT":"S", "TCC":"s", "TCA":"S", "TCG":"S",
    "TAT":"Y", "TAC":"Y", "TAA":"STOP", "TAG":"STOP",
    "TGT":"C", "TGC":"C", "TGA":"STOP", "TGG":"W",
    "CTT":"L", "CTC":"L", "CTA":"L", "CTG":"L",
    "CCT":"P", "CCC":"P", "CCA":"P", "CCG":"P",
    "CAT":"H", "CAC":"H", "CAA":"Q", "CAG":"Q",
    "CGT":"R", "CGC":"R", "CGA":"R", "CGG":"R",
    "ATT":"I", "ATC":"I", "ATA":"I", "ATG":"M",
    "ACT":"T", "ACC":"T", "ACA":"T", "ACG":"T",
    "AAT":"N", "AAC":"N", "AAA":"K", "AAG":"K",
    "AGT":"S", "AGC":"S", "AGA":"R", "AGG":"R",
    "GTT":"V", "GTC":"V", "GTA":"V", "GTG":"V",
    "GCT":"A", "GCC":"A", "GCA":"A", "GCG":"A",
    "GAT":"D", "GAC":"D", "GAA":"E", "GAG":"E",

    "GGT":"G", "GGC":"G", "GGA":"G", "GGG":"G",}
 
    for i in range(0,len(seq)):

        seq_line = str()
        if((i%2)==1):
            for j in range(0,len(seq[i])-1,3):
                seq_line += (codon_table[seq[i][j:j+3]])
        else:
            seq_line=">ORF_"+str(int(i/2+1))
        trans_file.write(seq_line+'\n')
    fseq.close()
    trans_file.close()
    return