Download this file.
Feb 22, 17:50 - Ping: Pretty nice. Please make indentation consistent (the indentation is 2, 3, or 4 spaces in different places; always use 4).
Feb 24, 16:17 - Jacob: sorry :( I'll try to stop writing code in pico.
Please log in if you would like to add comments. | |
1 | import speling | 2 | import re | 3 | in_words = open("input.txt") | 4 | bad_words = {} | 5 | while 1: | 6 | line = in_words.readline() | 7 | if not line: | 8 | break | 9 | punct = re.compile("[^-/A-Za-z'0-9 ]") | 10 | line = punct.sub("",line) | 11 | tokenizer = re.compile(" ") | 12 | tokens = tokenizer.split(line) | 13 | for word in tokens: | 14 | valid = re.compile("^[A-Za-z][A-Za-z']*$") | 15 | if not valid.search(word): | 16 | pass | 17 | else: | 18 | if speling.isindict(word): | 19 | pass | 20 | else: | 21 | bad_words[word] = bad_words.get(word,0)+1 | 22 | | 23 | items = bad_words.items() | 24 | items.sort() | 25 | for item in items: | 26 | print item[0]+" ("+str(item[1])+")" | 27 | in_words.close() |
|