""" cipherclasses.py
    c odenthal
    2005/05/30

    This module contains classes designed to be aids in
    solving cryptograms.
"""

from __future__ import division
from string import maketrans

import plottools, stringtools, counts, ciphertools, datatools, printtools
#import polyalphabetic

cipher2plain = dict(zip(stringtools.UPPERCASE,"_"*26))
plain2cipher = dict(zip(stringtools.LOWERCASE,"_"*26))
startTable = maketrans(stringtools.UPPERCASE,"_"*26)

class CipherText(str):

    def __init__(self,S=""):

        str.__init__(self,S)
        self.cipher2plain = cipher2plain.copy()
        self.plain2cipher = plain2cipher.copy()
        self.cipherText = stringtools.stripPunc(self.upper())
        self.decode()

    def __repr__(self):
        return " ".join(stringtools.breakIntoWords(self.cipherText,lngth=5))

    def __str__(self):
        return "\n"+printtools.formatRows(
                    (self.plainText, self.cipherText),
                    l = 5,
                    ll=80,
                    s=1)

    def __len__(self):
        return len(self.cipherText)

    def decode(self):
        self.__makeTable()
        self.plainText = self.cipherText.translate(self.transTable)

    def __makeTable(self):
        """ This makes a decipherment table that will
            translate the cipher text to plain text.
        """
        cp = self.cipher2plain.items()
        ct = "".join([c for c,p in cp])
        pt = "".join([p for c,p in cp])
        self.transTable = maketrans(ct,pt)

    def __map(self,CLett,pLett):
        """ Decodes the cipher text 'Ctext' as plain text 'ptext'.
        """
        C = CLett.upper()
        p = pLett.lower()
        oldC, self.plain2cipher[p] = self.plain2cipher[p], C
        self.cipher2plain[oldC] = "_"
        self.cipher2plain[C] = p
        self.decode()

    def __unmap(self,C):
        C = C.upper()
        p = self.cipher2plain[C]
        self.plain2cipher[p] = "_"
        self.cipher2plain[C] = "_"
        self.decode()

    def update(self,CT,PT=""):
        """ This calls '__map' or '__unmap' possibly multiple
            times to update the current deciphering guess.
            If no 'PT' is used, then the ciphertext 'CT' is
            unmapped. Otherwise, the plain text 'PT' is
            assigned to the cipher text 'CT'.
        """
        CT = list(CT)
        PT = list(PT)
        while PT:
            self.__map(CT.pop(0),PT.pop(0))
        while CT:
            self.__unmap(CT.pop())

    def tryShift(self,k,reverse=False):
        """ This trys to decode by setting the plain text
            equal to the cipher text alphabet shifted 'k'
            positions. Numerically this associates cipher
            text letter 'i' with plain text letter  'i+k % 26'.
        """
        self.reset()

        base = stringtools.UPPERCASE
        ct = base
        pt = base[k:] + base[:k]
        if reverse:
            ct = ct[::-1]

        self.update(ct,pt)

    def load(self,filename):
        self.cipherText = open(filename).read().upper().strip()
        self.decode()

    def reset(self):
        """ Erases any and all guesses about the plaintext.
        """
        self.cipher2plain = cipher2plain.copy()
        self.plain2cipher = plain2cipher.copy()
        self.decode()

    def letterFreq(self):
        """ Gets the letter fequencies of the cipher text.
        """
        return stringtools.letterFreq(self)

    def plotLetterFreq(self):
        """ Plots the letter frequencies of the cipher text.
        """
        # Get the letter frequencies and include zeros.
        frqdct = self.letterFreq()
        for c in stringtools.UPPERCASE:
            frqdct.setdefault(c,0)

        # Convert to a list sorted by letter, not frequency.
        frqlst = datatools.sortByKey(frqdct)

        # Display the results in the interpreter.
        for (c,f) in frqlst:
            print "%s  %3d  %s" % (c,f,"*"*f)

        # Display the results in a plot if available.
        try:    plottools.lstPlot([f for (c,f) in frqlst])
        except: pass

    def printLetterPerc(self,ref=counts.pridePrejLetter,refLabel="Austen"):
        """ Displays the percentages of the cipher letters occurences.
        """
        percList = datatools.sortByValue(
                    stringtools.letterPerc(self))
        dataStr = printtools.formatDataInCols((percList,ref))
        lhead1 = "Cipher letter"
        lhead2 = "percentages"
        rhead1 = refLabel + " letter"
        rhead2 = "percentages"
        headStr1 = "\n"+lhead1.center(30) + rhead1.center(18) + "\n"
        headStr2 = lhead2.center(30) + rhead2.center(18) + "\n"
        print headStr1+headStr2+"\n"+dataStr

    def printDigraphPerc(self,
            ref  = counts.pridePrejDigraph,
            refR = counts.pridePrejDigraphReverse,
            refLabel="Austen"):
        """ Displays the percentages of the cipher digraph occurences.
        """
        dDct = stringtools.digraphPerc(self)
        dLst = datatools.sortByValue(dDct)[:20]
        dRLst = []
        for (k,v) in dLst:
            rk = k[::-1]
            dRLst.append((rk,dDct.get(rk,0.0)))

        dataStr = printtools.formatDataInCols((dLst,dRLst,ref,refR))
        lhead1 = "Cipher digraph"
        lhead2 = "percentages"
        rhead1 = refLabel + " digraph"
        rhead2 = "percentages"
        headStr1 = "\n"+lhead1.center(54) + rhead1.center(40) + "\n"
        headStr2 = lhead2.center(54) + rhead2.center(40) + "\n"
        print headStr1+headStr2+"\n"+dataStr

    def printAllDigraphs(self,letters=""):
        """ Displays the frequency per thousand of all digraphs.
            The order of display is governed by 'letters'. The
            entries will have the frequencies of digraphs in
            English subtracted from them. These standard frequencies
            will be displayed in 'abc' order.
        """
        # First get the digraph percentages for standard English
        # and put them in an array in 'abc' order.
        standard = stringtools.UPPERCASE
        eDct = counts.senseDigraphPerc
        eLst = [[eDct.get(a+b,0) for b in standard]
                                 for a in standard]

        # Now get the digraph percentages for the cipher text
        # and put them in an array in 'letters' order.
        letters = letters.upper() + standard
        letters = stringtools.stripDups(letters)
        cDct = stringtools.digraphPerc(self)
        cLst = [[cDct.get(a+b,0) for b in letters]
                                 for a in letters]

        # First a header
        print " "*4 + "".join([ b.rjust(4) for b in letters]) + "\n"

        # The entries will be scaled differences between frequencies
        # of digraphs in standard english and in the cipher text.
        frmt = lambda x,y : str(int(50*(x-y))).rjust(4)
        for i,a in enumerate(letters):
            dLst = [frmt(x,y) for (x,y) in zip(cLst[i],eLst[i])]
            print a.center(5)+"".join(dLst)

    def showLowFreqContacts(self,lowFreq=0.04,spaces=1):
        """ Shows the contact numbers that the characters
            of the cipher text have with the low frequency
            characters.
        """
        freqDict = self.letterFreq()
        freqList = [freqDict.get(char,0) for char in stringtools.UPPERCASE]
        totaLett = sum(freqList)
        cutOff = lowFreq*totaLett

        print cutOff
        lowFreqList = [count<cutOff for count in freqList]
        lowFreqDict = dict(zip(stringtools.UPPERCASE,lowFreqList))

        CT = self.cipherText
        contacts = {}
        for k,c in enumerate(CT[1:]):
            if not lowFreqDict.get(c,True):
                if lowFreqDict.get(CT[k],False):
                    contacts[c] = contacts.get(c,0) + 1
        for k,c in enumerate(CT[:-1]):
            if not lowFreqDict.get(c,True):
                if lowFreqDict.get(CT[k+1],False):
                    contacts[c] = contacts.get(c,0) + 1

        contactList = [(contacts[c],c) for c in contacts]
        contactList.sort()
        contactList.reverse()

        chars = [" "+c for (f,c) in contactList]
        freqs = [f for (f,c) in contactList]
        for k,f in enumerate(freqs):
            if f < 10:
                freqs[k] = " "+str(f)
            else:
                freqs[k] = str(f)

        s = " "*spaces
        chars = s.join(chars)
        freqs = s.join(freqs)

        contactFreq = "\n"+ \
            "CT   " + chars +"\n"+  \
            "freq " + freqs

        print contactFreq

    def printAlphabets(self,spaces=1):
        """ Displays the cipher text and plain text alphabets.
        """

        s = " "*spaces
        pt1 = stringtools.LOWERCASE
        ct1 = s.join([self.plain2cipher[k] for k in pt1])

        ct2 = stringtools.UPPERCASE
        pt2 = s.join([self.cipher2plain[k] for k in ct2])

        alphabets =  "\n"+ \
            "pt  " + s.join(list(pt1)) +"\n"+  \
            "CT  " + ct1 +"\n"+  \
            "\n"+ \
            "CT  " + s.join(list(ct2)) +"\n"+  \
            "pt  " + pt2

        print alphabets

    def printCipherFreq(self,spaces=1):
        """ Displays the cipher text frequencies in order.
        """
        freqDict = self.letterFreq()
        freqList = [(freqDict.get(char,0), char)
                        for char in stringtools.UPPERCASE]
        freqList.sort()
        freqList.reverse()

        chars = [c.rjust(2) for (f,c) in freqList]
        freqs = [str(f).rjust(2) for (f,c) in freqList]

        s = " "*spaces
        chars = s.join(chars)
        freqs = s.join(freqs)

        cipherFreq = "\n"+ \
            "CTxt  " + chars +"\n"+  \
            "freq  " + freqs

        print cipherFreq

    #------------------------------------------------------

    def monoPhi(self):
        """ Computes the quantity phi that is defined as
                sum_{1:26} m_i(m_i - 1) / M(M-1)
            where m_i is the number of occurences in the
            ith letter and M is the total number of letters.
            If this is a monoalphabetic cipher, phi should
            be around 0.065.
        """
        return ciphertools.phi(self)

    def monoPsi(self):
        """ Computes the quantity psi that is defined as
                sum_{1:26} (m_i / M)**2
            where m_i is the number of occurences in the
            ith letter and M is the total number of letters.
            If this is a monoalphabetic cipher, psi should
            be around 0.065 + 0.035/M.
        """
        return ciphertools.psi(self)

    def periodKasiski(self,k):
        """ Find the repeated substrings in the cipher text
            of length k.
        """
        ciphertools.testKasiski(self,k)

    def periodKappa(self,k):
        """ Plots the result of running the 'kappa' test on the cipher text
            against shifted versions of the cipher text. If the cipher is
            a periodic polyalphabetic cipher, this should be around 0.065
            when 'i' is a multiple of the period. This computes kappa for
            all shifts 'i' in 'range(1,k+1)'.
        """
        # Compute kappa for each shift 'i'.
        kappaLst = [ ciphertools.kappaTest(self,i) for i in range(1,k+1) ]

        # Display the results in the interpreter.
        for i,ka in enumerate(kappaLst):
            n = int(1000*ka)
            print "%2d  %0.4f  %s" % (i+1,ka,"*"*n)

        # Display the results in a plot if available.
        try:    plottools.lstPlot(kappaLst)
        except: pass

    def periodPhi(self,k):
        """ For each 'i' in 'range(1,k+1)' this computes the average of the
            results of running the 'phi' test on each of the column texts
            obtained from writing the cipher text in 'depth' with 'i' columns.
            If the cipher is a periodic polyalphabetic cipher, this should
            be around 0.065 when the number of columns ('i') is a multiple
            of the period. The results for each 'i' are then plotted.
        """
        # Compute the average phi for each 'i'.
        phiLst = []
        for i in range(1,k+1):
            colLst = stringtools.breakIntoCols(self,i)
            avgPhi = ciphertools.phiAvg(colLst)
            phiLst.append(avgPhi)

        # Display the results in the interpreter.
        for i,pa in enumerate(phiLst):
            n = int(1000*pa)
            print "%2d  %0.4f  %s" % (i+1,pa,"*"*n)

        # Display the results in a plot if available.
        try:    plottools.lstPlot(phiLst)
        except: pass

    def __shiftChi(self,d,bLP):
        """ Computes chi of the probabilities in the base probability list
            (bPL) against all the possible shifts of each the 'd' columns
            obtained by writing the cipher text in 'depth'.
        """
        # Break the cipher text into 'd' columns and consider each in turn.
        colLst = stringtools.breakIntoCols(self,d)
        recLst = []
        for col in colLst:

            # Get the letter probability dictionary for the column.
            chPrb = stringtools.letterProb(col)

            # Make sure 'chPrb.keys' includes all letters.
            for c in stringtools.UPPERCASE:
                chPrb.setdefault(c,0)

            # Put them in a list in 'abc' order.
            chPrb = datatools.sortByKey(chPrb)

            # Drop the keys so we have just the probabilities in the list.
            chPrb = [v for (k,v) in chPrb]

            sLst = []
            # Now consider each 'shift' in turn.
            for s in range(26):
                # Shift.
                sLP = chPrb[s:] + chPrb[:s]

                # Compute chi.
                schi = sum([p*q for (p,q) in zip (bLP,sLP)])
                sLst.append(schi)

            # Add this list of chi values to the record.
            recLst.append(sLst)

        # Take the transpose.
        recLst = apply(zip,recLst)

        # Print out a header.
        print
        print "COLUMNS".center(6*d)
        head = lambda i : "%2s" % i
        print "SHIFTS    " + "      ".join(map(head,range(d))) + "\n"

        # Print out the results after changing to percents
        frmt = lambda x : "%5.2f" % (100*x,)
        for (i,rec) in enumerate(recLst):
            label =  "  %2s    " % i
            print label + "   ".join(map(frmt,rec))

    def shiftStandardChi(self,d):
        """ Computes chi of the standard alphabet ('ABCD...') against all the
            possible shifts of each the 'd' columns obtained by writing the
            cipher text in 'depth'.
        """
        # Get the letter probability dictionary for the standard alphabet,
        eLP = counts.englishLetterProb

        # Put them in a list in 'abc' order.
        eLP = datatools.sortByKey(eLP)

        # Drop the keys so we have just the probabilities in the list.
        eLP = [v for (k,v) in eLP]

        self.__shiftChi(d,eLP)

    def shiftMutualChi(self,d,k):
        """ Computes chi of the alphabet in column 'k' against all the
            possible shifts of each the 'd' columns obtained by writing the
            cipher text in 'depth'.
        """
        # Break the cipher text into 'd' columns.
        colLst = stringtools.breakIntoCols(self,d)

        # Get the letter probability dictionary for the base column 'k'.
        bLP = stringtools.letterProb(colLst[k])

        # Make sure 'bLP.keys' includes all letters.
        for c in stringtools.UPPERCASE:
            bLP.setdefault(c,0)

        # Put them in a list in 'abc' order.
        bLP = datatools.sortByKey(bLP)

        # Drop the keys so we have just the probabilities in the list.
        bLP = [v for (k,v) in bLP]

        self.__shiftChi(d,bLP)

    def allStandardShift(self,d,letters=False):
        """ Computes chi of the standard alphabet (abc...) against all the
            possible shifts of each the 'd' columns obtained by writing
            the cipher text in 'depth'. For each column the shifts providing
            large chi are selected. A list of these shifts is returned.
        """
        # Break the cipher text into 'd' columns.
        colLst = stringtools.breakIntoCols(self,d)

        # Get the letter probability dictionary for the standard alphabet,
        eLP = counts.englishLetterProb

        # Put them in a list in 'abc' order.
        eLP = datatools.sortByKey(eLP)

        # Drop the keys so we have just the probabilities in the list.
        eLP = [v for (k,v) in eLP]

        maxLst = []
        shftLst = []
        for col in colLst:

            # Get the letter probability dictionary for the column.
            chPrb = stringtools.letterProb(col)

            # Make sure 'chPrb.keys' includes all letters.
            for c in stringtools.UPPERCASE:
                chPrb.setdefault(c,0)

            # Put them in a list in 'abc' order.
            chPrb = datatools.sortByKey(chPrb)

            # Drop the keys so we have just the probabilities in the list.
            chPrb = [v for (k,v) in chPrb]

            sLst = []
            # Now consider each 'shift' in turn.
            for s in range(26):
                # Shift.
                sLP = chPrb[s:] + chPrb[:s]

                # Compute chi.
                schi = sum([p*q for (p,q) in zip (eLP,sLP)])
                sLst.append(schi)

            # Compute the max and find it's index.
            m = max(sLst)
            i = sLst.index(m)

            # Add this index to the record.
            maxLst.append(i)

            # Compute the large values and find their index.
            cutoff = 0.100
            shft = []
            while len(shft) == 0:
                shft = [(x,i) for (i,x) in enumerate(sLst)
                        if x >= cutoff]
                cutoff -= 0.005

            if len(shft) == 1:
                shft = [(x,i) for (i,x) in enumerate(sLst)
                        if x >= cutoff]
            shft.sort()
            shft.reverse()

            frmt = lambda x : "%2.2f" % (100*x,)
            if letters:
                lett = lambda i : stringtools.UPPERCASE[i]
            else:
                lett = lambda i : str(i).rjust(2)

            shft = [lett(i).center(4)+frmt(x) for (x,i) in shft]

            shftLst.append(shft)

        # Now display all the potential shifts.
        print
        for shft in shftLst:
            print "\t".join(shft)

        return maxLst

    def __oneMutualShift(self,d,k):
        """ Computes chi of the alphabet in column 'k' against all the
            possible shifts of each the 'd' columns obtained by writing
            the cipher text in 'depth'. For each column a list of shifts
            that provide large chi are selected. A list of these shift
            lists is returned.
        """
        # Break the cipher text into 'd' columns.
        colLst = stringtools.breakIntoCols(self,d)

        # Get the letter probability dictionary for the base column 'k'.
        bLP = stringtools.letterProb(colLst[k])

        # Make sure 'bLP.keys' includes all letters.
        for c in stringtools.UPPERCASE:
            bLP.setdefault(c,0)

        # Put them in a list in 'abc' order.
        bLP = datatools.sortByKey(bLP)

        # Drop the keys so we have just the probabilities in the list.
        bLP = [v for (k,v) in bLP]

        shftLst = []
        for col in colLst:

            # Get the letter probability dictionary for the column.
            chPrb = stringtools.letterProb(col)

            # Make sure 'chPrb.keys' includes all letters.
            for c in stringtools.UPPERCASE:
                chPrb.setdefault(c,0)

            # Put them in a list in 'abc' order.
            chPrb = datatools.sortByKey(chPrb)

            # Drop the keys so we have just the probabilities in the list.
            chPrb = [v for (k,v) in chPrb]

            sLst = []
            # Now consider each 'shift' in turn.
            for s in range(26):
                # Shift.
                sLP = chPrb[s:] + chPrb[:s]

                # Compute chi.
                schi = sum([p*q for (p,q) in zip (bLP,sLP)])
                sLst.append(schi)

            # Compute the large values and find their index.
            cutoff = 0.100
            shft = []
            while len(shft) == 0:
                shft = [i for (i,x) in enumerate(sLst)
                        if x >= cutoff]
                cutoff -= 0.005

            if len(shft) == 1:
                shft = [i for (i,x) in enumerate(sLst)
                        if x >= cutoff]

            # Add these indices to the record.
            shftLst.append(shft)

        return shftLst

    def allMutualShift(self,d):
        """ Computes chi of the alphabet in each column 'k' (k in range(d))
            against all the possible shifts of each the 'd' columns obtained
            by writing the cipher text in 'depth'. For each column the shift
            providing maximum chi is selected. A list of these shifts is returned.
        """
        recLst = [ self.__oneMutualShift(d,k) for k in range(d) ]

        # Print header
        head = lambda i : "%2s" % i
        print
        print " "*12 + "        ".join(map(head,range(d))) + "\n"

        # Print out the results
        frmt = lambda x : "%10s" % (",".join(map(str,x)),)
        for (i,rec) in enumerate(recLst):
            label =  "%2s  " % i
            print label + "".join(map(frmt,rec))

    def shiftColumns(self,shftLst):
        """ This writes the cipher text in depth using
            d = len(shftLst) columns. Then each column
            has its alphabet shifted the indicated number
            of places, i.e. the 'ith' column is shifted
            shftLst[i] places.
        """
        d = len(shftLst)

        # Break the cipher text into columns.
        colLst = stringtools.breakIntoCols(self,d)

        # We'll need the length of the columns.
        D = len(colLst[0])

        # Now make each column a 'CiperText' object.
        colLst = map(CipherText,colLst)

        # Apply the shifts to the column cipher texts
        for i in range(d):
            colLst[i].tryShift((26-shftLst[i])%26)

        # Concatenate the columns (their plain text, the
        # new cipher text) back into a string, split them
        # into 'D' columns, and concatentate them back into
        # a string again. Silly, but - at this point - it
        # beats thinking.

        colLst = [col.plainText for col in colLst]

        # We need to make all the columns the same length.
        colLst = [col + " "*(D-len(col)) for col in colLst]
        strng = "".join(colLst)
        rowLst = stringtools.breakIntoCols(strng,D)
        strng = "".join(rowLst)

        # Put the rows together into the monoalphabetic text.
        return CipherText(strng)