Next: Programmazione dinamica
Up: Espressioni regolari e ricerca
Previous: Esercizio 1
  Indice
''' Endonuclese contains the REXENZ class'''
def rebase2regex(pat):
''' rebase2regex(pat) returns the regular expression defined
by pat, where pat is the rextriction enzyme pattern
'''
import re
pattern=pat.replace('^','')
#R = G or A
pattern=pattern.replace('R','[GA]')
#Y = C or T
pattern=pattern.replace('Y','[CT]')
#M = A or C
pattern=pattern.replace('M','[AC]')
#K = G or T
pattern=pattern.replace('K','[GC]')
#S = G or C
pattern=pattern.replace('S','[GC]')
#W = A or T
pattern=pattern.replace('W','[AT]')
#B = not A (C or G or T)
pattern=pattern.replace('B','[CGT]')
#D = not C (A or G or T)
pattern=pattern.replace('D','[AGT]')
#H = not G (A or C or T) =[a,c,t]
pattern=pattern.replace('H','[ACT]')
#V = not T (A or C or G) =[a,c,g]
pattern=pattern.replace('V','[ACG]')
#N = A or C or G or T = [a,c,g,t]
pattern=pattern.replace('N','[ACGT]')
return re.compile(pattern,re.IGNORECASE)
def cut_dna(setlist,seq):
''' cut_dna(setlist,seq) '''
retseqs=[]
if setlist[0] == 0 : # nothing before
setlist=setlist[1:]
if setlist:
oldpos=0
for newpos in setlist:
retseqs.append(seq[oldpos:newpos])
oldpos=newpos
retseqs.append(seq[newpos:len(seq)])
return retseqs
##############################
class REXENZ:
def __init__(self, name='',pat=''):
''' __init__(self, name='',pat='') '''
self.name=name
self.pat=pat
try :
self.clivepos=self.pat.index('^')
except:
self.clivepos=0 # unknown
if pat :
self.__rex=rebase2regex(pat)
def setname(self,name):
''' setname(self,name) '''
self.name=name
def setpat(self,pat):
''' setpat(self,pat) '''
self.pat=pat
try :
self.clivepos=self.pat.index('^')
except:
self.clivepos=0 # unknown
if pat :
self.__rex=rebase2regex(pat)
def digeststart(self,seq):
''' digeststart(self,seq) '''
setlist=[]
shift=0
localseq=seq
m=self.__rex.search(localseq)
if m :
shift=shift+m.end()
setlist.append(m.start())
localseq=localseq[m.end():]
while m :
m=self.__rex.search(localseq)
if m :
setlist.append(shift+m.start())
localseq=localseq[m.end():]
shift=shift+m.end()
return setlist
def digest(self,seq):
''' digestst(self,seq) '''
setlist=self.digeststart(seq)
newlist=[]
for i in xrange(len(setlist)):
if setlist[i]+self.clivepos < len(seq):
newlist.append(setlist[i]+self.clivepos)
return newlist
def doubledigest(self,enz,seq):
''' doubledigestst(self,enz,seq) '''
setlist1=self.digest(seq)
setlist2=enz.digest(seq)
retlist=[]
n1=len(setlist1)
n2=len(setlist2)
e1=0
e2=0
while e1 < n1 and e2 < n2:
if setlist2[e2] > setlist1[e1]:
retlist.append(setlist1[e1])
e1=e1+1
elif setlist2[e2] < setlist1[e1]:
retlist.append(setlist2[e2])
e2=e2+1
else:
retlist.append(setlist1[e1])
e1=e1+1
e2=e2+1
while e1 < n1:
retlist.append(setlist1[e1])
e1=e1+1
while e2 < n2:
retlist.append(setlist2[e2])
e2=e2+1
return retlist
def digested_dna(self,seq):
''' digested_dna(self,seq) '''
retseqs=[]
setlist=self.digest(seq)
return cut_dna(setlist,seq)
def doubledigested_dna(self,enz,seq):
''' doubledigested_dna(self,enz,seq) '''
retseqs=[]
setlist=self.doubledigest(enz,seq)
return cut_dna(setlist,seq)
2004-11-02