import re # Searches for and counts strings like this: '0 1 Z 0 0 1 1 0 # Read entire contents of file into variable named stuff stuff = open('rjmcmc-dependent.txt', 'r').read() # Use a regular expression search to pull out all model strings and store in model_list variable # The re.M tells the re module that there may be newlines in stuff (M = multiline) # The [Z012] items each say that the searched-for expression has either a Z or a 0 or a 1 or a 2 at that position. model_list = re.findall("'[Z0-9] [Z0-9] [Z0-9] [Z0-9] [Z0-9] [Z0-9] [Z0-9] [Z0-9]", stuff, re.M | re.S) # Create a dictionary entry to keep track of the total count for each distinct model string model = {} for m in model_list: if m in model.keys(): # this model string already has an entry, add 1 to count model[m] += 1 else: # this model string is distinct, start count at 1 model[m] = 1 # Create a list of tuples (v,k), where v is the value (count) and k is the key (model string), # then sort from highest to lowest (count) model_tuples = [(v,k) for (k,v) in model.items()] model_tuples.sort() model_tuples.reverse() # Write out all counts and their associated model strings total = 0 for v,k in model_tuples: print '%12d %s' % (v, k) total += v print 'Total matches: %d' % total