Difference between revisions of "Minsu Kim/python/Rosalind"

From Biolecture.org
imported>Minsukim
imported>Minsukim
Line 120: Line 120:
  
 
<p>&nbsp;</p>
 
<p>&nbsp;</p>
 +
 +
<h2>Dynamic Program</h2>
 +
 +
<p>def maxvalue(a,b,c):<br />
 +
&nbsp; &nbsp; a=a+5<br />
 +
&nbsp; &nbsp; b=b-6<br />
 +
&nbsp; &nbsp; c=c-6<br />
 +
&nbsp; &nbsp; return max(a,b,c)<br />
 +
def maxvalue2(a,b,c):<br />
 +
&nbsp; &nbsp; a=a-2<br />
 +
&nbsp; &nbsp; b=b-6<br />
 +
&nbsp; &nbsp; c=c-6<br />
 +
&nbsp; &nbsp; return max(a,b,c)</p>
 +
 +
<p>def compare(a,b):<br />
 +
&nbsp; &nbsp; import numpy as np<br />
 +
&nbsp; &nbsp; scoring = np.zeros([len(a)+1,len(b)+1])<br />
 +
&nbsp; &nbsp; for i in range(0,len(a)+1):<br />
 +
&nbsp; &nbsp; &nbsp; &nbsp; scoring[i,0]=-6*i<br />
 +
&nbsp; &nbsp; for j in range(0,len(b)+1):<br />
 +
&nbsp; &nbsp; &nbsp; &nbsp; scoring[0,j]=-6*j</p>
 +
 +
<p>&nbsp; &nbsp; for row in range(1,len(a)+1):<br />
 +
&nbsp; &nbsp; &nbsp; &nbsp; for column in range(1,len(b)+1):<br />
 +
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if b[column-1]==a[row-1]:<br />
 +
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; scoring[row,column] = maxvalue(scoring[row-1,column-1],scoring[row-1,column],scoring[row,column-1])<br />
 +
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if b[column-1]!=a[row-1]:<br />
 +
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; scoring[row,column] = maxvalue2(scoring[row-1,column-1],scoring[row-1,column],scoring[row,column-1])<br />
 +
&nbsp; &nbsp; print(scoring)</p>
 +
 +
<p>&nbsp; &nbsp; list=[]<br />
 +
&nbsp; &nbsp; for row in range(1,len(a)+1):<br />
 +
&nbsp; &nbsp; &nbsp; &nbsp; for column in range(1,len(b)+1):<br />
 +
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; new=scoring[row,column]<br />
 +
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; list.append(new)<br />
 +
&nbsp; &nbsp; mxv=max(list)<br />
 +
&nbsp; &nbsp; print(mxv)</p>
 +
 +
<p><img alt="" src="/ckfinder/userfiles/images/bandicam%202017-12-12%2019-14-32-803.jpg" style="height:530px; width:640px" /></p>

Revision as of 19:17, 12 December 2017

I solve the problem relative the bioinformatics in web site : rosalind

 

function

def patterncount(text, pattern):
    count=0
    for i in range(0,len(text)):
        if(text[i:len(pattern)+i]==pattern):
            count += 1
    return count

def complementing(sequence):
    sequence = sequence[::-1]
    text = list(sequence)
    for i in range(0,len(text)):
        if text[i] == 'A':
            text[i] = 'T'
        elif text[i] == 'G':
            text[i] = 'C'
        elif text[i] == 'C':
            text[i] = 'G'
        elif text[i] == 'T':
            text[i] = 'A'
        
    s = ''.join(text)
    print(s)
    return 0

rosalind transcript basic

f = open('rosalind_rna.txt')
a = f.readline()
f.close

text = list(a)

for i in range(0,len(text)):
    if text[i] == 'T':
        text[i] = 'U'
a = ''.join(text)

print(a)

f = open('rosalind_revc2.txt','r')
b = f.readline()
f.close

b = b[::-1]
text = list(b)

for i in range(0,len(text)):
    if text[i] == 'A':
        text[i] = 'T'
    elif text[i] == 'G':
        text[i] = 'C'
    elif text[i] == 'C':
        text[i] = 'G'
    elif text[i] == 'T':
        text[i] = 'A'
        
b = ''.join(text)

print(b)

Computing CG Content

f_fa = open('rosalind_gc.txt','r')
seq_list = dict()
for line in f_fa:
    if line.startswith('>'):
        name = line.strip('>').strip()
        seq_list[name] = ''
    else:
        seq_list[name]= line.strip()
f_fa.close()

seq_CG = dict()
for k in seq_list:
    count = 0
    s = seq_list[k]
    s = list(s)
    for i in range(0,len(s)):
        if s[i] == 'C' or s[i] == 'G':
            count = count + 1
    p = count / len(s)
    seq_CG[k] = p

inverse = [(value, key) for key, value in seq_CG.items()]
max(inverse)

import matplotlib.pyplot as plt

seq_list = []
f_fa = open('Ecoli_genome.fasta', 'r')
for line in f_fa:
    if not line.startswith('>'):
        seq_list.append(line.strip())
tmp_seq = ''.join(seq_list)

count_G = 0
count_C = 0
count_list = []
for tmp_n in tmp_seq:
    if tmp_n == 'G':
        count_G += 1
    if tmp_n == 'C':
        count_C += 1
    count_list.append(count_G-count_C)

pos_list = range(0,len(tmp_seq))

fig = plt.figure(figsize = (10,6))
ax1 = fig.add_subplot(1,1,1)
ax1.plot(pos_list, count_list, 'b-')
ax1.grid()
plt.show()
f_fa.close()

 

Dynamic Program

def maxvalue(a,b,c):
    a=a+5
    b=b-6
    c=c-6
    return max(a,b,c)
def maxvalue2(a,b,c):
    a=a-2
    b=b-6
    c=c-6
    return max(a,b,c)

def compare(a,b):
    import numpy as np
    scoring = np.zeros([len(a)+1,len(b)+1])
    for i in range(0,len(a)+1):
        scoring[i,0]=-6*i
    for j in range(0,len(b)+1):
        scoring[0,j]=-6*j

    for row in range(1,len(a)+1):
        for column in range(1,len(b)+1):
            if b[column-1]==a[row-1]:
                scoring[row,column] = maxvalue(scoring[row-1,column-1],scoring[row-1,column],scoring[row,column-1])
            if b[column-1]!=a[row-1]:
                scoring[row,column] = maxvalue2(scoring[row-1,column-1],scoring[row-1,column],scoring[row,column-1])
    print(scoring)

    list=[]
    for row in range(1,len(a)+1):
        for column in range(1,len(b)+1):
            new=scoring[row,column]
            list.append(new)
    mxv=max(list)
    print(mxv)