import speling import re in_words = open("input.txt") bad_words = {} while 1: line = in_words.readline() if not line: break punct = re.compile("[^-/A-Za-z'0-9 ]") line = punct.sub("",line) tokenizer = re.compile(" ") tokens = tokenizer.split(line) for word in tokens: valid = re.compile("^[A-Za-z][A-Za-z']*$") if not valid.search(word): pass else: if speling.isindict(word): pass else: bad_words[word] = bad_words.get(word,0)+1 items = bad_words.items() items.sort() for item in items: print item[0]+" ("+str(item[1])+")" in_words.close()