import re

#  Searches for and counts strings like this: '0 1 Z 0 0 1 1 0

# Read entire contents of file into variable named stuff
stuff = open('rjmcmc-dependent.txt', 'r').read()

# Use a regular expression search to pull out all model strings and store in model_list variable
# The re.M tells the re module that there may be newlines in stuff (M = multiline)
# The [Z012] items each say that the searched-for expression has either a Z or a 0 or a 1 or a 2 at that position.
model_list = re.findall("'[Z0-9] [Z0-9] [Z0-9] [Z0-9] [Z0-9] [Z0-9] [Z0-9] [Z0-9]", stuff, re.M | re.S)

# Create a dictionary entry to keep track of the total count for each distinct model string
model = {}
for m in model_list:
    if m in model.keys():
        # this model string already has an entry, add 1 to count
        model[m] += 1
    else:
        # this model string is distinct, start count at 1
        model[m] = 1

# Create a list of tuples (v,k), where v is the value (count) and k is the key (model string),
# then sort from highest to lowest (count)
model_tuples = [(v,k) for (k,v) in model.items()]
model_tuples.sort()
model_tuples.reverse()

# Write out all counts and their associated model strings
total = 0
for v,k in model_tuples:
    print '%12d   %s' % (v, k)
    total += v
print 'Total matches: %d' % total