Source code
SL = open('sequence_list.txt','w')
number = int(input('How many genes do you want?: '))
for n in range(number):
gene_name = input('input gene name: ')
SL.write('>' + gene_name + '\n')
print('input the length of ',gene_name,': ',end='')
length = int(input())
sequence = str()
for i in range(length):
sequence += random_base()
SL.write(sequence + '\n')
SL.close()
def random_base():
i = random.randrange(1,5)
if i == 1:
return 'A'
elif i == 2:
return 'T'
elif i == 3:
return 'G'
elif i == 4:
SL = open('sequence_list.txt','r')
RL = open('reverse_list.txt','w')
read = SL.readlines()
rev_name = list()
if i % 2 == 0:
rev_name.append(read[i])
else:
rev_seq = str()
for i in range(0,len(rev[k])):
if rev[k][i] == 'A':
rev_seq = 'T' + rev_seq
elif rev[k][i] == 'T':
rev_seq = 'A' + rev_seq
elif rev[k][i] == 'G':
rev_seq = 'C' + rev_seq
elif rev[k][i] == 'C':
rev_seq = 'G' + rev_seq
RL.write(rev_name[k][:-1]+'_rev\n')
RL.write(rev_seq + '\n')
SL.close()
RL.close()
return
def ORF_seq(name,seq):
file_name = name[1:-1]
ORF_file = open((file_name)+'_ORF.txt','w')
ini_cod = list()
ter_cod = list()
for i in range(0,len(seq)-2):
if seq[i:i+3] == 'ATG':
if seq[i:i+3] == 'TGA':
ter_cod.append(i)
elif seq[i:i+3] == 'TAA':
ter_cod.append(i)
elif seq[i:i+3] == 'TAG':
ter_cod.append(i)
p=0
for i in range(0,len(ini_cod)):
for j in range(0,len(ter_cod)):
if ter_cod[j] > ini_cod[i] and (ter_cod[j] - ini_cod[i]) % 3 == 0:
p+=1
ORF_file.write(">ORF_"+str(p)+"\n")
ORF_file.write(seq[ini_cod[i]:(ter_cod[j]+3)]+'\n')
return
file_name = input('input file name to find ORF in directory: ')
fseq = open(file_name,'r')
gene_list = fseq.readlines()
name_list = list()
seq_list = list()
for i in range(0,len(gene_list)):
if i % 2 == 0:
name_list.append(gene_list[i])
else:
ORF_seq(name_list[i],seq_list[i])
def TRANS():
file_name = input('input ORF file name to translate in directory: ')
fseq = open(file_name,'r')
trans_file = open((file_name[:-4] + '_trans.txt'),'w')
seq = fseq.readlines()
codon_table = {"TTT":"F", "TTC":"F", "TTA":"L", "TTG":"L",
"TCT":"S", "TCC":"s", "TCA":"S", "TCG":"S",
"TAT":"Y", "TAC":"Y", "TAA":"STOP", "TAG":"STOP",
"TGT":"C", "TGC":"C", "TGA":"STOP", "TGG":"W",
"CTT":"L", "CTC":"L", "CTA":"L", "CTG":"L",
"CCT":"P", "CCC":"P", "CCA":"P", "CCG":"P",
"CAT":"H", "CAC":"H", "CAA":"Q", "CAG":"Q",
"CGT":"R", "CGC":"R", "CGA":"R", "CGG":"R",
"ATT":"I", "ATC":"I", "ATA":"I", "ATG":"M",
"ACT":"T", "ACC":"T", "ACA":"T", "ACG":"T",
"AAT":"N", "AAC":"N", "AAA":"K", "AAG":"K",
"AGT":"S", "AGC":"S", "AGA":"R", "AGG":"R",
"GTT":"V", "GTC":"V", "GTA":"V", "GTG":"V",
"GCT":"A", "GCC":"A", "GCA":"A", "GCG":"A",
"GAT":"D", "GAC":"D", "GAA":"E", "GAG":"E",
seq_line = str()
if((i%2)==1):
for j in range(0,len(seq[i])-1,3):
seq_line += (codon_table[seq[i][j:j+3]])
else:
seq_line=">ORF_"+str(int(i/2+1))
trans_file.write(seq_line+'\n')
fseq.close()
trans_file.close()
return