import speling.py def misspell(): """returns a list of words which appear in Bush's speech but are not in our dictionary""" import re set_dictionary('dict.txt') input = open('input.txt') input = input.read() #input is now a string of words pat = re.compile('[\n(--).,?";!:/$1234567890]') input = pat.sub(' ', input) #input is now stripped of punctuation marks pat = re.compile('[.]') #for words like U.S. input = pat.sub('', input) pat = re.compile('[1234567890]*\\w*]') #for words like 20th input = pat.sub('', input) input = input.split() #input is now a sequence/list of words misspelled = [] #misspelled is the list of misspelled words index = spellcheck_text(input, index) #index is the index of the next misspelled word while index !=-1: #as long as there is one more misspelled word misspelled.insert(0, input[index]) #add the misspelled word into the misspelled words list index = spellcheck_text(input, index + 1) #obtain the index of the next misspelled word misspelled.sort() misspelled = count(misspelled) #misspelled is a dictionary of keys (words) and values (occurences) sortedmisspelled = [] for key in misspelled.keys(): sortedmisspelled.append(key + ' (' + str(misspelled[key]) + ')' ) #formats the list of words and values sortedmisspelled.sort() #sorts the lists of words and values for element in sortedmisspelled: #prints each word and its value on its own line print element print '\n'