1 | #!/usr/bin/env python
|
2 | # Calvin Smith
|
3 | # Craft assignment 3, part 3
|
4 | # 2/09/03
|
5 |
|
6 | """Read text and check it against a dictionary, printing
|
7 | the misspelled words and how many times they are misspelled.
|
8 | Use the following format to run the script: python spelcheck.py < input.txt
|
9 | """
|
10 |
|
11 | import sys
|
12 |
|
13 | dictionary = []
|
14 |
|
15 | def loaddictionary(dictionaryfilename):
|
16 | """Set the dictionary to be used."""
|
17 | global dictionary
|
18 | f = open(dictionaryfilename, 'r')
|
19 | dictionarylines = f.readlines()
|
20 | dictionary = [x.replace('\n', '') for x in dictionarylines]
|
21 | f.close()
|
22 |
|
23 | def addword(word):
|
24 | """Add a word to the dictionary."""
|
25 | pass
|
26 |
|
27 | def deleteword(word):
|
28 | """Delete a word from the dictionary."""
|
29 | pass
|
30 |
|
31 | def checktext(text):
|
32 | """Spellcheck a chunk of text.
|
33 |
|
34 | Returns a dictionary of misspelled words, with the key being the word and
|
35 | the value being how many times it is misspelled. If no errors, an empty list
|
36 | is returned.
|
37 | """
|
38 | mispeled = {}
|
39 | text = text.replace('\n', ' ')
|
40 | text = removenonwordchars(text)
|
41 | words = text.split()
|
42 | for word in words:
|
43 | if checkword(word) != None:
|
44 | if mispeled.get(word, None):
|
45 | mispeled[word] = mispeled[word] + 1
|
46 | else:
|
47 | mispeled[word] = 1
|
48 | return mispeled
|
49 |
|
50 | def removenonwordchars(text):
|
51 | """Remove non-word characters such as numbers and punctuation from text."""
|
52 | for symbol in '()[].,-?"&$#@!%^*_=+/\\;:0123456789':
|
53 | text = text.replace(symbol, '')
|
54 | return text
|
55 |
|
56 | def checkword(word):
|
57 | """Spellcheck an individual word.
|
58 |
|
59 | Return the mispelled word or None.
|
60 | """
|
61 | global dictionary
|
62 | isword = 0
|
63 | try:
|
64 | dictionary.index(word)
|
65 | return None
|
66 | except ValueError:
|
67 | try:
|
68 | dictionary.index(word.lower())
|
69 | return None
|
70 | except ValueError:
|
71 | return word
|
72 |
|
73 |
|
74 | def main(text):
|
75 | loaddictionary('dict.txt')
|
76 | mispeled = checktext(text)
|
77 | words = mispeled.keys()
|
78 | words.sort()
|
79 | for word in words:
|
80 | print word, '(' + str(mispeled[word]) + ')'
|
81 |
|
82 | if __name__ == "__main__":
|
83 | main(sys.stdin.read()) |