Mostly Linux & Python syntax notes and hyperlinks.

Sunday, July 27, 2014

python: Write totals at different indentation levels

"""
ReportWithLevels
__author__ = 'Margery Harrison'
__license__ = "Public Domain"
__version__ = "1.0"
"""

import sys

class ReportWithLevels():

    def __init__(self):
        """
        Sets the default values for the class
        """
        self.fdout=sys.stdout  #default - writes report to stdout
        self.number_width=10
        self.level_indent=2
        self.total_width=30
        self.debug = True
        self.min_level = 1
        self.max_level = 4

        #levels before which to print a newline
        self.newline_before = [1]

    # open the input path as file to write to
    def open_outfile(self,path):
        try:
            self.fdout = open(path,'w')
        except IOError:
            msg="{0:s} Can't open and write to {1:s}".format(self.__class__.__name__,path)
            sys.stderr.write(msg)

    # print debug statement if debugging turned on
    def debugPrint(self,message):
        if self.debug:
            print(message)

    # Write a line out to the output file with newline at end
    def writeLine(self,message):
        self.fdout.write(message + '\n')

    # Print message and total with indentation set by input level
    def printLevel(self, level, message, total):
        lev=int(level)
        assert lev >= self.min_level and lev <= self.max_level,\
            "input level not within current limits"

        #skip a space before level 1 statements
        if lev in self.newline_before:
            self.writeLine('')

        #s1 and s2 are number of spaces for formatting
        s1 = lev * self.level_indent
        s2 = self.total_width - s1

        #initialize fstr to the correct number of spaces
        fstr='{{0:{0:d}s}} {{1:{1:d}s}}'.format(s1,s2)

        # Number format string is right justified within number_width
        number_format='{{2:-{0:d}d}}'.format(self.number_width)
        fstr+=number_format
        self.debugPrint('level {0:d} format str= {1:s}'.format(lev,fstr))

        self.writeLine(fstr.format(' ', message, int(total)))


if __name__ == '__main__':
    print "Testing ReportWithLevels.printLevel()"
    pl=ReportWithLevels()
    pl.open_outfile("testout.txt")
    pl.writeLine("This is my report")
    pl.number_width=12
    tot=7
    for level in [1,2,3,2,2,3,4,3,1]:  #range(1,4):
        msg='level {0:d} msg'.format(level)
        tot=tot * 12
        pl.printLevel(level,msg,tot)


    #pl.printLevel(8,"level 8 msg",88)  #test assert error

assert isinstance() not for file stream parameters?

I'm writing methods in PyCharm, and I'd like to follow its hints for the way I should be structuring my code. Here I'm passing in a parameter of type file stream, could be sys.out or a file descriptor. It prompted me to include an "assert isinstance()" for the input parameter. 

    def set_stream(self,fdout):
        """
        :param fdout: 
        """ 
        assert isinstance(fdout,...) 

I was looking up what type to call it for the purposes of isinstance() and I came across  http://dobesland.wordpress.com/2007/10/07/python-isinstance-considered-useful/:
The classic example of this is python’s famous “file-like objects”, which typically implement read and/or write in the same way and are accepted by various python functions.  I believe the DB API is another well-used example of this.  In both cases, there is no common base-class, so it’s impossible to use isinstance() to check whether a particular object is, in fact, file-like or a database object.
I guess I'll leave that assert() out. The code will break reliably enough when someone tries to write to an fdout that isn't the right sort of object.

Sunday, July 20, 2014

python: CSV analysis using Counter and DictReader and format

Here is a simple CSV file:

name,color,size,shape,number
tom,red,big,square,3
mary,blue,big,triangle,5
sally,green,small,square,2
edith,blue,small,triangle,1
wally,red,big,square,7
jon,blue,small,triangle,3

This code reads in the simple CSV and reports on it:


import os.path 
import csv 
import collections


def printLevel(level, message, total):
""" Print message and totals, with spacing determined by level
   
    Keyword arguments:
    level   -- integer from 1 to 4
    message -- string, e.g. "Number of happy tomcats"
    total   -- for this version, should be an integer count 
"""
    lev=int(level)
        

    if lev==1:
        print("")
    #fstr1='{0:5s} {1:35s} {2:-3d}'
    #fstr2='{0:10s} {1:30s} {2:-3d}'
    s1=5*lev  #5 or 10
    s2=40-s1
    fstr='{{0:{0:d}s}} {{1:{1:d}s}}'.format(s1,s2)
    fstr+=' {2:-3d}'

    print fstr.format(' ', message, int(total))

def print_colors_shapes(c):

"""Prints report on number of shapes of different colors
    keyword argument:
    c -- collection that includes values in shape_list, color_list below
"""
    shape_list= ['square', 'triangle','circle']
    color_list= ['red', 'blue', 'green','yellow']
    #first print shapes
    for shape in shape_list:
        msg = 'Number of '+ shape + 's'
        tot=c.get(shape,"0")
        printLevel(1, msg,tot)
        if int(tot) > 0:
            for color in color_list:
                msg = "Number of " + color + " " + shape + 's'
                tot=c.get(color + '_' + shape,"0")
                printLevel(2, msg, tot)
    for color in color_list:
        msg = 'Total {0} shapes'.format(color)
        tot = c.get(color,'0')
        printLevel(1,msg,tot)


def count_color_shape(my_reader):

""" create counters for colors and shapes separate and combined
    keyword argument:
    my_reader -- of type csv.DictReader
"""
    c = collections.Counter()
    for row in my_reader:
        print row
        color = row['color']
        shape = row['shape']
        c[color] += 1
        c[shape] += 1
        c[color + '_' + shape] += 1
    return c



def read_dict(path):
    with open(path) as csv_file:
        my_reader = csv.DictReader(csv_file)
        print my_reader.fieldnames
        #now have the read_dict() method return the Counter datastructure
        c = count_color_shape(my_reader)
        print c
        print_colors_shapes(c)

def test_dict():
    datadir = "/Users/margery/Documents/pystuff/pyGotham/demo/data"
    csv_file = 'simpleCSV.txt'
    path = os.path.join(datadir, csv_file)
    print path
    read_dict(path)

test_dict()
 
Here is the output:
/usr/bin/python /Users/margery/PycharmProjects/proj3/TestDict1.py
/Users/margery/Documents/pystuff/pyGotham/demo/data/simpleCSV.txt
['name', 'color', 'size', 'shape', 'number']
{'color': 'red', 'shape': 'square', 'number': '3', 'name': 'tom', 'size': 'big'}
{'color': 'blue', 'shape': 'triangle', 'number': '5', 'name': 'mary', 'size': 'big'}
{'color': 'green', 'shape': 'square', 'number': '2', 'name': 'sally', 'size': 'small'}
{'color': 'blue', 'shape': 'triangle', 'number': '1', 'name': 'edith', 'size': 'small'}
{'color': 'red', 'shape': 'square', 'number': '7', 'name': 'wally', 'size': 'big'}
{'color': 'blue', 'shape': 'triangle', 'number': '3', 'name': 'jon', 'size': 'small'}
Counter({'blue': 3, 'square': 3, 'triangle': 3, 'blue_triangle': 3, 'red_square': 2, 'red': 2, 'green': 1, 'green_square': 1})

      Number of squares                     3
           Number of red squares            2
           Number of blue squares           0
           Number of green squares          1
           Number of yellow squares         0

      Number of triangles                   3
           Number of red triangles          0
           Number of blue triangles         3
           Number of green triangles        0
           Number of yellow triangles       0

      Number of circles                     0

      Total red shapes                      2

      Total blue shapes                     3

      Total green shapes                    1

      Total yellow shapes                   0


Saturday, July 19, 2014

python: csv.dictreader - That's what I should have been using

I learned late Wednesday that I'm supposed to give a talk at PyGotham about my forays into python dictionary land. So I did some research. And what I really should have been using is a csv reader flavor called dictreader. This does exist in python 2.6, so being stuck there is no excuse.

There's a good explanation at http://pymotw.com/2/csv/#using-field-names