""" cipherclasses.py c odenthal 2005/05/30 This module contains classes designed to be aids in solving cryptograms. """ from __future__ import division from string import maketrans import plottools, stringtools, counts, ciphertools, datatools, printtools #import polyalphabetic cipher2plain = dict(zip(stringtools.UPPERCASE,"_"*26)) plain2cipher = dict(zip(stringtools.LOWERCASE,"_"*26)) startTable = maketrans(stringtools.UPPERCASE,"_"*26) class CipherText(str): def __init__(self,S=""): str.__init__(self,S) self.cipher2plain = cipher2plain.copy() self.plain2cipher = plain2cipher.copy() self.cipherText = stringtools.stripPunc(self.upper()) self.decode() def __repr__(self): return " ".join(stringtools.breakIntoWords(self.cipherText,lngth=5)) def __str__(self): return "\n"+printtools.formatRows( (self.plainText, self.cipherText), l = 5, ll=80, s=1) def __len__(self): return len(self.cipherText) def decode(self): self.__makeTable() self.plainText = self.cipherText.translate(self.transTable) def __makeTable(self): """ This makes a decipherment table that will translate the cipher text to plain text. """ cp = self.cipher2plain.items() ct = "".join([c for c,p in cp]) pt = "".join([p for c,p in cp]) self.transTable = maketrans(ct,pt) def __map(self,CLett,pLett): """ Decodes the cipher text 'Ctext' as plain text 'ptext'. """ C = CLett.upper() p = pLett.lower() oldC, self.plain2cipher[p] = self.plain2cipher[p], C self.cipher2plain[oldC] = "_" self.cipher2plain[C] = p self.decode() def __unmap(self,C): C = C.upper() p = self.cipher2plain[C] self.plain2cipher[p] = "_" self.cipher2plain[C] = "_" self.decode() def update(self,CT,PT=""): """ This calls '__map' or '__unmap' possibly multiple times to update the current deciphering guess. If no 'PT' is used, then the ciphertext 'CT' is unmapped. Otherwise, the plain text 'PT' is assigned to the cipher text 'CT'. """ CT = list(CT) PT = list(PT) while PT: self.__map(CT.pop(0),PT.pop(0)) while CT: self.__unmap(CT.pop()) def tryShift(self,k,reverse=False): """ This trys to decode by setting the plain text equal to the cipher text alphabet shifted 'k' positions. Numerically this associates cipher text letter 'i' with plain text letter 'i+k % 26'. """ self.reset() base = stringtools.UPPERCASE ct = base pt = base[k:] + base[:k] if reverse: ct = ct[::-1] self.update(ct,pt) def load(self,filename): self.cipherText = open(filename).read().upper().strip() self.decode() def reset(self): """ Erases any and all guesses about the plaintext. """ self.cipher2plain = cipher2plain.copy() self.plain2cipher = plain2cipher.copy() self.decode() def letterFreq(self): """ Gets the letter fequencies of the cipher text. """ return stringtools.letterFreq(self) def plotLetterFreq(self): """ Plots the letter frequencies of the cipher text. """ # Get the letter frequencies and include zeros. frqdct = self.letterFreq() for c in stringtools.UPPERCASE: frqdct.setdefault(c,0) # Convert to a list sorted by letter, not frequency. frqlst = datatools.sortByKey(frqdct) # Display the results in the interpreter. for (c,f) in frqlst: print "%s %3d %s" % (c,f,"*"*f) # Display the results in a plot if available. try: plottools.lstPlot([f for (c,f) in frqlst]) except: pass def printLetterPerc(self,ref=counts.pridePrejLetter,refLabel="Austen"): """ Displays the percentages of the cipher letters occurences. """ percList = datatools.sortByValue( stringtools.letterPerc(self)) dataStr = printtools.formatDataInCols((percList,ref)) lhead1 = "Cipher letter" lhead2 = "percentages" rhead1 = refLabel + " letter" rhead2 = "percentages" headStr1 = "\n"+lhead1.center(30) + rhead1.center(18) + "\n" headStr2 = lhead2.center(30) + rhead2.center(18) + "\n" print headStr1+headStr2+"\n"+dataStr def printDigraphPerc(self, ref = counts.pridePrejDigraph, refR = counts.pridePrejDigraphReverse, refLabel="Austen"): """ Displays the percentages of the cipher digraph occurences. """ dDct = stringtools.digraphPerc(self) dLst = datatools.sortByValue(dDct)[:20] dRLst = [] for (k,v) in dLst: rk = k[::-1] dRLst.append((rk,dDct.get(rk,0.0))) dataStr = printtools.formatDataInCols((dLst,dRLst,ref,refR)) lhead1 = "Cipher digraph" lhead2 = "percentages" rhead1 = refLabel + " digraph" rhead2 = "percentages" headStr1 = "\n"+lhead1.center(54) + rhead1.center(40) + "\n" headStr2 = lhead2.center(54) + rhead2.center(40) + "\n" print headStr1+headStr2+"\n"+dataStr def printAllDigraphs(self,letters=""): """ Displays the frequency per thousand of all digraphs. The order of display is governed by 'letters'. The entries will have the frequencies of digraphs in English subtracted from them. These standard frequencies will be displayed in 'abc' order. """ # First get the digraph percentages for standard English # and put them in an array in 'abc' order. standard = stringtools.UPPERCASE eDct = counts.senseDigraphPerc eLst = [[eDct.get(a+b,0) for b in standard] for a in standard] # Now get the digraph percentages for the cipher text # and put them in an array in 'letters' order. letters = letters.upper() + standard letters = stringtools.stripDups(letters) cDct = stringtools.digraphPerc(self) cLst = [[cDct.get(a+b,0) for b in letters] for a in letters] # First a header print " "*4 + "".join([ b.rjust(4) for b in letters]) + "\n" # The entries will be scaled differences between frequencies # of digraphs in standard english and in the cipher text. frmt = lambda x,y : str(int(50*(x-y))).rjust(4) for i,a in enumerate(letters): dLst = [frmt(x,y) for (x,y) in zip(cLst[i],eLst[i])] print a.center(5)+"".join(dLst) def showLowFreqContacts(self,lowFreq=0.04,spaces=1): """ Shows the contact numbers that the characters of the cipher text have with the low frequency characters. """ freqDict = self.letterFreq() freqList = [freqDict.get(char,0) for char in stringtools.UPPERCASE] totaLett = sum(freqList) cutOff = lowFreq*totaLett print cutOff lowFreqList = [count= cutoff] cutoff -= 0.005 if len(shft) == 1: shft = [(x,i) for (i,x) in enumerate(sLst) if x >= cutoff] shft.sort() shft.reverse() frmt = lambda x : "%2.2f" % (100*x,) if letters: lett = lambda i : stringtools.UPPERCASE[i] else: lett = lambda i : str(i).rjust(2) shft = [lett(i).center(4)+frmt(x) for (x,i) in shft] shftLst.append(shft) # Now display all the potential shifts. print for shft in shftLst: print "\t".join(shft) return maxLst def __oneMutualShift(self,d,k): """ Computes chi of the alphabet in column 'k' against all the possible shifts of each the 'd' columns obtained by writing the cipher text in 'depth'. For each column a list of shifts that provide large chi are selected. A list of these shift lists is returned. """ # Break the cipher text into 'd' columns. colLst = stringtools.breakIntoCols(self,d) # Get the letter probability dictionary for the base column 'k'. bLP = stringtools.letterProb(colLst[k]) # Make sure 'bLP.keys' includes all letters. for c in stringtools.UPPERCASE: bLP.setdefault(c,0) # Put them in a list in 'abc' order. bLP = datatools.sortByKey(bLP) # Drop the keys so we have just the probabilities in the list. bLP = [v for (k,v) in bLP] shftLst = [] for col in colLst: # Get the letter probability dictionary for the column. chPrb = stringtools.letterProb(col) # Make sure 'chPrb.keys' includes all letters. for c in stringtools.UPPERCASE: chPrb.setdefault(c,0) # Put them in a list in 'abc' order. chPrb = datatools.sortByKey(chPrb) # Drop the keys so we have just the probabilities in the list. chPrb = [v for (k,v) in chPrb] sLst = [] # Now consider each 'shift' in turn. for s in range(26): # Shift. sLP = chPrb[s:] + chPrb[:s] # Compute chi. schi = sum([p*q for (p,q) in zip (bLP,sLP)]) sLst.append(schi) # Compute the large values and find their index. cutoff = 0.100 shft = [] while len(shft) == 0: shft = [i for (i,x) in enumerate(sLst) if x >= cutoff] cutoff -= 0.005 if len(shft) == 1: shft = [i for (i,x) in enumerate(sLst) if x >= cutoff] # Add these indices to the record. shftLst.append(shft) return shftLst def allMutualShift(self,d): """ Computes chi of the alphabet in each column 'k' (k in range(d)) against all the possible shifts of each the 'd' columns obtained by writing the cipher text in 'depth'. For each column the shift providing maximum chi is selected. A list of these shifts is returned. """ recLst = [ self.__oneMutualShift(d,k) for k in range(d) ] # Print header head = lambda i : "%2s" % i print print " "*12 + " ".join(map(head,range(d))) + "\n" # Print out the results frmt = lambda x : "%10s" % (",".join(map(str,x)),) for (i,rec) in enumerate(recLst): label = "%2s " % i print label + "".join(map(frmt,rec)) def shiftColumns(self,shftLst): """ This writes the cipher text in depth using d = len(shftLst) columns. Then each column has its alphabet shifted the indicated number of places, i.e. the 'ith' column is shifted shftLst[i] places. """ d = len(shftLst) # Break the cipher text into columns. colLst = stringtools.breakIntoCols(self,d) # We'll need the length of the columns. D = len(colLst[0]) # Now make each column a 'CiperText' object. colLst = map(CipherText,colLst) # Apply the shifts to the column cipher texts for i in range(d): colLst[i].tryShift((26-shftLst[i])%26) # Concatenate the columns (their plain text, the # new cipher text) back into a string, split them # into 'D' columns, and concatentate them back into # a string again. Silly, but - at this point - it # beats thinking. colLst = [col.plainText for col in colLst] # We need to make all the columns the same length. colLst = [col + " "*(D-len(col)) for col in colLst] strng = "".join(colLst) rowLst = stringtools.breakIntoCols(strng,D) strng = "".join(rowLst) # Put the rows together into the monoalphabetic text. return CipherText(strng)