A Python script to crudely tally the number of times a space-delimited word appears in a text file. Uses a Counter object to track the number of occurrences for each word.

# -*- coding: utf-8 -*-
Created on Wed Sep 26 20:04:11 2012

Returns the most-common space-delimited words in a file.

@author: robert
from collections import Counter
import re

def openfile(filename):
    fh = open(filename, "r+")
    str = fh.read()
    return str

def removegarbage(str):
    # Replace one or more non-word (non-alphanumeric) chars with a space
    str = re.sub(r'\W+', ' ', str)
    str = str.lower()
    return str

def getwordbins(words):
    cnt = Counter()
    for word in words:
        cnt[word] += 1
    return cnt

def main(filename, topwords):
    txt = openfile(filename)
    txt = removegarbage(txt)
    words = txt.split(' ')
    bins = getwordbins(words)
    for key, value in bins.most_common(topwords):
        print key,value

main('speech.txt', 500)

Example output:

the 235
and 161
to 132
of 125
that 101
a 91
in 83
we 70
is 54
our 40
who 39
for 39
people 37
not 36
are 32
on 29
it 28
be 28
their 27
must 26
have 25
those 25
will 25
with 25
as 22
world 21
this 19
all 18
america 18
because 18
from 17
they 17
i 17
an 17
