next up previous contents
Next: Programmazione dinamica Up: Espressioni regolari e ricerca Previous: Esercizio 1   Indice

Enzimi di restrizione

''' Endonuclese contains the REXENZ class'''

def rebase2regex(pat):
    ''' rebase2regex(pat) returns the regular expression defined
        by pat, where pat is the rextriction enzyme pattern 
    '''
    import re
    
    pattern=pat.replace('^','')
    #R = G or A
    pattern=pattern.replace('R','[GA]')
    #Y = C or T
    pattern=pattern.replace('Y','[CT]')
    #M = A or C
    pattern=pattern.replace('M','[AC]')
    #K = G or T
    pattern=pattern.replace('K','[GC]')
    #S = G or C
    pattern=pattern.replace('S','[GC]')
    #W = A or T
    pattern=pattern.replace('W','[AT]')
    #B = not A (C or G or T)
    pattern=pattern.replace('B','[CGT]')
    #D = not C (A or G or T)
    pattern=pattern.replace('D','[AGT]')
    #H = not G (A or C or T) =[a,c,t]
    pattern=pattern.replace('H','[ACT]')
    #V = not T (A or C or G) =[a,c,g]
    pattern=pattern.replace('V','[ACG]')
    #N = A or C or G or T = [a,c,g,t]
    pattern=pattern.replace('N','[ACGT]')
           
    return re.compile(pattern,re.IGNORECASE) 
    
def cut_dna(setlist,seq):
    ''' cut_dna(setlist,seq) '''
    retseqs=[]
    if setlist[0] == 0 : # nothing before  
        setlist=setlist[1:]
    if setlist:
        oldpos=0
        for newpos in setlist:
            retseqs.append(seq[oldpos:newpos])
            oldpos=newpos
        retseqs.append(seq[newpos:len(seq)])
    return retseqs
    

##############################

class REXENZ:

    def __init__(self, name='',pat=''):
        ''' __init__(self, name='',pat='') '''
        self.name=name
        self.pat=pat
        try :
            self.clivepos=self.pat.index('^') 
        except:
            self.clivepos=0 # unknown
        if pat :
            self.__rex=rebase2regex(pat)
        
    def setname(self,name):
        ''' setname(self,name) '''
        self.name=name
    
    def setpat(self,pat):
        ''' setpat(self,pat) '''
        self.pat=pat
        try :
            self.clivepos=self.pat.index('^') 
        except:
            self.clivepos=0 # unknown
        if pat :
            self.__rex=rebase2regex(pat)

    def digeststart(self,seq):
        ''' digeststart(self,seq) '''
        setlist=[]
        shift=0
        localseq=seq
        m=self.__rex.search(localseq)
        if m :
            shift=shift+m.end()
            setlist.append(m.start())
            localseq=localseq[m.end():]
        while m :
            m=self.__rex.search(localseq)
            if m :
               setlist.append(shift+m.start())
               localseq=localseq[m.end():]
               shift=shift+m.end()
        return setlist

    def digest(self,seq):
        ''' digestst(self,seq) '''
        setlist=self.digeststart(seq)
        newlist=[]
        for i in xrange(len(setlist)):
            if setlist[i]+self.clivepos < len(seq):
                newlist.append(setlist[i]+self.clivepos)
        return newlist

    def doubledigest(self,enz,seq):
        ''' doubledigestst(self,enz,seq) '''
        setlist1=self.digest(seq)
        setlist2=enz.digest(seq)
        retlist=[]
        n1=len(setlist1)
        n2=len(setlist2)
        e1=0
        e2=0
        while e1 < n1 and e2 < n2:
            if setlist2[e2] > setlist1[e1]:
                retlist.append(setlist1[e1])
                e1=e1+1
            elif setlist2[e2] < setlist1[e1]:
                retlist.append(setlist2[e2])
                e2=e2+1
            else:
                retlist.append(setlist1[e1])
                e1=e1+1
                e2=e2+1
        while e1 < n1:
            retlist.append(setlist1[e1])
            e1=e1+1
        while e2 < n2:
            retlist.append(setlist2[e2])
            e2=e2+1
        return retlist


    def digested_dna(self,seq):
        ''' digested_dna(self,seq) '''
        retseqs=[]
        setlist=self.digest(seq)
        return cut_dna(setlist,seq)

    def doubledigested_dna(self,enz,seq):
        ''' doubledigested_dna(self,enz,seq) '''
        retseqs=[]
        setlist=self.doubledigest(enz,seq)
        return cut_dna(setlist,seq)


2004-11-02