| 1 | #!/usr/bin/env python
|
| 2 | # Calvin Smith
|
| 3 | # Craft assignment 3, part 3
|
| 4 | # 2/09/03
|
| 5 |
|
| 6 | """Read text and check it against a dictionary, printing
|
| 7 | the misspelled words and how many times they are misspelled.
|
| 8 | Use the following format to run the script: python spelcheck.py < input.txt
|
| 9 | """
|
| 10 |
|
| 11 | import sys
|
| 12 |
|
| 13 | dictionary = []
|
| 14 |
|
| 15 | def loaddictionary(dictionaryfilename):
|
| 16 | """Set the dictionary to be used."""
|
| 17 | global dictionary
|
| 18 | f = open(dictionaryfilename, 'r')
|
| 19 | dictionarylines = f.readlines()
|
| 20 | dictionary = [x.replace('\n', '') for x in dictionarylines]
|
| 21 | f.close()
|
| 22 |
|
| 23 | def addword(word):
|
| 24 | """Add a word to the dictionary."""
|
| 25 | pass
|
| 26 |
|
| 27 | def deleteword(word):
|
| 28 | """Delete a word from the dictionary."""
|
| 29 | pass
|
| 30 |
|
| 31 | def checktext(text):
|
| 32 | """Spellcheck a chunk of text.
|
| 33 |
|
| 34 | Returns a dictionary of misspelled words, with the key being the word and
|
| 35 | the value being how many times it is misspelled. If no errors, an empty list
|
| 36 | is returned.
|
| 37 | """
|
| 38 | mispeled = {}
|
| 39 | text = text.replace('\n', ' ')
|
| 40 | text = removenonwordchars(text)
|
| 41 | words = text.split()
|
| 42 | for word in words:
|
| 43 | if checkword(word) != None:
|
| 44 | if mispeled.get(word, None):
|
| 45 | mispeled[word] = mispeled[word] + 1
|
| 46 | else:
|
| 47 | mispeled[word] = 1
|
| 48 | return mispeled
|
| 49 |
|
| 50 | def removenonwordchars(text):
|
| 51 | """Remove non-word characters such as numbers and punctuation from text."""
|
| 52 | for symbol in '()[].,-?"&$#@!%^*_=+/\\;:0123456789':
|
| 53 | text = text.replace(symbol, '')
|
| 54 | return text
|
| 55 |
|
| 56 | def checkword(word):
|
| 57 | """Spellcheck an individual word.
|
| 58 |
|
| 59 | Return the mispelled word or None.
|
| 60 | """
|
| 61 | global dictionary
|
| 62 | isword = 0
|
| 63 | try:
|
| 64 | dictionary.index(word)
|
| 65 | return None
|
| 66 | except ValueError:
|
| 67 | try:
|
| 68 | dictionary.index(word.lower())
|
| 69 | return None
|
| 70 | except ValueError:
|
| 71 | return word
|
| 72 |
|
| 73 |
|
| 74 | def main(text):
|
| 75 | loaddictionary('dict.txt')
|
| 76 | mispeled = checktext(text)
|
| 77 | words = mispeled.keys()
|
| 78 | words.sort()
|
| 79 | for word in words:
|
| 80 | print word, '(' + str(mispeled[word]) + ')'
|
| 81 |
|
| 82 | if __name__ == "__main__":
|
| 83 | main(sys.stdin.read()) |