Mostly Linux & Python syntax notes and hyperlinks.

Sunday, July 20, 2014

python: CSV analysis using Counter and DictReader and format

Here is a simple CSV file:

name,color,size,shape,number
tom,red,big,square,3
mary,blue,big,triangle,5
sally,green,small,square,2
edith,blue,small,triangle,1
wally,red,big,square,7
jon,blue,small,triangle,3

This code reads in the simple CSV and reports on it:


import os.path 
import csv 
import collections


def printLevel(level, message, total):
""" Print message and totals, with spacing determined by level
   
    Keyword arguments:
    level   -- integer from 1 to 4
    message -- string, e.g. "Number of happy tomcats"
    total   -- for this version, should be an integer count 
"""
    lev=int(level)
        

    if lev==1:
        print("")
    #fstr1='{0:5s} {1:35s} {2:-3d}'
    #fstr2='{0:10s} {1:30s} {2:-3d}'
    s1=5*lev  #5 or 10
    s2=40-s1
    fstr='{{0:{0:d}s}} {{1:{1:d}s}}'.format(s1,s2)
    fstr+=' {2:-3d}'

    print fstr.format(' ', message, int(total))

def print_colors_shapes(c):

"""Prints report on number of shapes of different colors
    keyword argument:
    c -- collection that includes values in shape_list, color_list below
"""
    shape_list= ['square', 'triangle','circle']
    color_list= ['red', 'blue', 'green','yellow']
    #first print shapes
    for shape in shape_list:
        msg = 'Number of '+ shape + 's'
        tot=c.get(shape,"0")
        printLevel(1, msg,tot)
        if int(tot) > 0:
            for color in color_list:
                msg = "Number of " + color + " " + shape + 's'
                tot=c.get(color + '_' + shape,"0")
                printLevel(2, msg, tot)
    for color in color_list:
        msg = 'Total {0} shapes'.format(color)
        tot = c.get(color,'0')
        printLevel(1,msg,tot)


def count_color_shape(my_reader):

""" create counters for colors and shapes separate and combined
    keyword argument:
    my_reader -- of type csv.DictReader
"""
    c = collections.Counter()
    for row in my_reader:
        print row
        color = row['color']
        shape = row['shape']
        c[color] += 1
        c[shape] += 1
        c[color + '_' + shape] += 1
    return c



def read_dict(path):
    with open(path) as csv_file:
        my_reader = csv.DictReader(csv_file)
        print my_reader.fieldnames
        #now have the read_dict() method return the Counter datastructure
        c = count_color_shape(my_reader)
        print c
        print_colors_shapes(c)

def test_dict():
    datadir = "/Users/margery/Documents/pystuff/pyGotham/demo/data"
    csv_file = 'simpleCSV.txt'
    path = os.path.join(datadir, csv_file)
    print path
    read_dict(path)

test_dict()
 
Here is the output:
/usr/bin/python /Users/margery/PycharmProjects/proj3/TestDict1.py
/Users/margery/Documents/pystuff/pyGotham/demo/data/simpleCSV.txt
['name', 'color', 'size', 'shape', 'number']
{'color': 'red', 'shape': 'square', 'number': '3', 'name': 'tom', 'size': 'big'}
{'color': 'blue', 'shape': 'triangle', 'number': '5', 'name': 'mary', 'size': 'big'}
{'color': 'green', 'shape': 'square', 'number': '2', 'name': 'sally', 'size': 'small'}
{'color': 'blue', 'shape': 'triangle', 'number': '1', 'name': 'edith', 'size': 'small'}
{'color': 'red', 'shape': 'square', 'number': '7', 'name': 'wally', 'size': 'big'}
{'color': 'blue', 'shape': 'triangle', 'number': '3', 'name': 'jon', 'size': 'small'}
Counter({'blue': 3, 'square': 3, 'triangle': 3, 'blue_triangle': 3, 'red_square': 2, 'red': 2, 'green': 1, 'green_square': 1})

      Number of squares                     3
           Number of red squares            2
           Number of blue squares           0
           Number of green squares          1
           Number of yellow squares         0

      Number of triangles                   3
           Number of red triangles          0
           Number of blue triangles         3
           Number of green triangles        0
           Number of yellow triangles       0

      Number of circles                     0

      Total red shapes                      2

      Total blue shapes                     3

      Total green shapes                    1

      Total yellow shapes                   0


No comments:

Post a Comment