|
Download this file.
Feb 22, 17:50 - Ping: Pretty nice. Please make indentation consistent (the indentation is 2, 3, or 4 spaces in different places; always use 4).
Feb 24, 16:17 - Jacob: sorry :( I'll try to stop writing code in pico.
Please log in if you would like to add comments. | |
| 1 | import speling | | 2 | import re | | 3 | in_words = open("input.txt") | | 4 | bad_words = {} | | 5 | while 1: | | 6 | line = in_words.readline() | | 7 | if not line: | | 8 | break | | 9 | punct = re.compile("[^-/A-Za-z'0-9 ]") | | 10 | line = punct.sub("",line) | | 11 | tokenizer = re.compile(" ") | | 12 | tokens = tokenizer.split(line) | | 13 | for word in tokens: | | 14 | valid = re.compile("^[A-Za-z][A-Za-z']*$") | | 15 | if not valid.search(word): | | 16 | pass | | 17 | else: | | 18 | if speling.isindict(word): | | 19 | pass | | 20 | else: | | 21 | bad_words[word] = bad_words.get(word,0)+1 | | 22 | | | 23 | items = bad_words.items() | | 24 | items.sort() | | 25 | for item in items: | | 26 | print item[0]+" ("+str(item[1])+")" | | 27 | in_words.close() |
|