Margie's Tech Blog: July 2014

Sunday, July 27, 2014

python: Write totals at different indentation levels


"""

ReportWithLevels

__author__ = 'Margery Harrison'

__license__ = "Public Domain"

__version__ = "1.0"

"""



import sys



class ReportWithLevels():



    def __init__(self):

        """

        Sets the default values for the class

        """

        self.fdout=sys.stdout  #default - writes report to stdout

        self.number_width=10

        self.level_indent=2

        self.total_width=30

        self.debug = True

        self.min_level = 1

        self.max_level = 4



        #levels before which to print a newline

        self.newline_before = [1]



    # open the input path as file to write to

    def open_outfile(self,path):

        try:

            self.fdout = open(path,'w')

        except IOError:

            msg="{0:s} Can't open and write to {1:s}".format(self.__class__.__name__,path)

            sys.stderr.write(msg)



    # print debug statement if debugging turned on

    def debugPrint(self,message):

        if self.debug:

            print(message)



    # Write a line out to the output file with newline at end

    def writeLine(self,message):

        self.fdout.write(message + '\n')



    # Print message and total with indentation set by input level

    def printLevel(self, level, message, total):

        lev=int(level)

        assert lev >= self.min_level and lev <= self.max_level,\

            "input level not within current limits"



        #skip a space before level 1 statements

        if lev in self.newline_before:

            self.writeLine('')



        #s1 and s2 are number of spaces for formatting

        s1 = lev * self.level_indent

        s2 = self.total_width - s1



        #initialize fstr to the correct number of spaces

        fstr='{{0:{0:d}s}} {{1:{1:d}s}}'.format(s1,s2)



        # Number format string is right justified within number_width

        number_format='{{2:-{0:d}d}}'.format(self.number_width)

        fstr+=number_format

        self.debugPrint('level {0:d} format str= {1:s}'.format(lev,fstr))



        self.writeLine(fstr.format(' ', message, int(total)))





if __name__ == '__main__':

    print "Testing ReportWithLevels.printLevel()"

    pl=ReportWithLevels()

    pl.open_outfile("testout.txt")

    pl.writeLine("This is my report")

    pl.number_width=12

    tot=7

    for level in [1,2,3,2,2,3,4,3,1]:  #range(1,4):

        msg='level {0:d} msg'.format(level)

        tot=tot * 12

        pl.printLevel(level,msg,tot)





    #pl.printLevel(8,"level 8 msg",88)  #test assert error

assert isinstance() not for file stream parameters?

I'm writing methods in PyCharm, and I'd like to follow its hints for the way I should be structuring my code. Here I'm passing in a parameter of type file stream, could be sys.out or a file descriptor. It prompted me to include an "assert isinstance()" for the input parameter.

def set_stream(self,fdout):
"""
:param fdout:
"""
assert isinstance(fdout,...)

I was looking up what type to call it for the purposes of isinstance() and I came across http://dobesland.wordpress.com/2007/10/07/python-isinstance-considered-useful/:

The classic example of this is python’s famous “file-like objects”, which typically implement read and/or write in the same way and are accepted by various python functions. I believe the DB API is another well-used example of this. In both cases, there is no common base-class, so it’s impossible to use isinstance() to check whether a particular object is, in fact, file-like or a database object.

I guess I'll leave that assert() out. The code will break reliably enough when someone tries to write to an fdout that isn't the right sort of object.

Sunday, July 20, 2014

python: CSV analysis using Counter and DictReader and format

Here is a simple CSV file:

name,color,size,shape,number
tom,red,big,square,3
mary,blue,big,triangle,5
sally,green,small,square,2
edith,blue,small,triangle,1
wally,red,big,square,7
jon,blue,small,triangle,3

This code reads in the simple CSV and reports on it:

import os.path
import csv

import collections


def printLevel(level, message, total):

""" Print message and totals, with spacing determined by level

    Keyword arguments:

    level   -- integer from 1 to 4

    message -- string, e.g. "Number of happy tomcats"

    total   -- for this version, should be an integer count

"""

    lev=int(level)

    if lev==1:

        print("")

    #fstr1='{0:5s} {1:35s} {2:-3d}'

    #fstr2='{0:10s} {1:30s} {2:-3d}'

    s1=5*lev  #5 or 10

    s2=40-s1

    fstr='{{0:{0:d}s}} {{1:{1:d}s}}'.format(s1,s2)

    fstr+=' {2:-3d}'


    print fstr.format(' ', message, int(total))



def print_colors_shapes(c):

"""Prints report on number of shapes of different colors
keyword argument:
c -- collection that includes values in shape_list, color_list below

"""

    shape_list= ['square', 'triangle','circle']

    color_list= ['red', 'blue', 'green','yellow']

    #first print shapes

    for shape in shape_list:

        msg = 'Number of '+ shape + 's'

        tot=c.get(shape,"0")

        printLevel(1, msg,tot)

        if int(tot) > 0:

            for color in color_list:

                msg = "Number of " + color + " " + shape + 's'

                tot=c.get(color + '_' + shape,"0")

                printLevel(2, msg, tot)

    for color in color_list:

        msg = 'Total {0} shapes'.format(color)

        tot = c.get(color,'0')

        printLevel(1,msg,tot)



def count_color_shape(my_reader):

""" create counters for colors and shapes separate and combined
keyword argument:
my_reader -- of type csv.DictReader

""" 

    c = collections.Counter()

    for row in my_reader:

        print row

        color = row['color']

        shape = row['shape']

        c[color] += 1

        c[shape] += 1

        c[color + '_' + shape] += 1

    return c


def read_dict(path):

    with open(path) as csv_file:

        my_reader = csv.DictReader(csv_file)

        print my_reader.fieldnames

        #now have the read_dict() method return the Counter datastructure

        c = count_color_shape(my_reader)

        print c

        print_colors_shapes(c)


def test_dict():

    datadir = "/Users/margery/Documents/pystuff/pyGotham/demo/data"

    csv_file = 'simpleCSV.txt'

    path = os.path.join(datadir, csv_file)

    print path

    read_dict(path)


test_dict()

Here is the output:


/usr/bin/python /Users/margery/PycharmProjects/proj3/TestDict1.py

/Users/margery/Documents/pystuff/pyGotham/demo/data/simpleCSV.txt

['name', 'color', 'size', 'shape', 'number']

{'color': 'red', 'shape': 'square', 'number': '3', 'name': 'tom', 'size': 'big'}

{'color': 'blue', 'shape': 'triangle', 'number': '5', 'name': 'mary', 'size': 'big'}

{'color': 'green', 'shape': 'square', 'number': '2', 'name': 'sally', 'size': 'small'}

{'color': 'blue', 'shape': 'triangle', 'number': '1', 'name': 'edith', 'size': 'small'}

{'color': 'red', 'shape': 'square', 'number': '7', 'name': 'wally', 'size': 'big'}

{'color': 'blue', 'shape': 'triangle', 'number': '3', 'name': 'jon', 'size': 'small'}

Counter({'blue': 3, 'square': 3, 'triangle': 3, 'blue_triangle': 3, 'red_square': 2, 'red': 2, 'green': 1, 'green_square': 1})



      Number of squares                     3

           Number of red squares            2

           Number of blue squares           0

           Number of green squares          1

           Number of yellow squares         0



      Number of triangles                   3

           Number of red triangles          0

           Number of blue triangles         3

           Number of green triangles        0

           Number of yellow triangles       0



      Number of circles                     0



      Total red shapes                      2



      Total blue shapes                     3



      Total green shapes                    1



      Total yellow shapes                   0

Saturday, July 19, 2014

python: csv.dictreader - That's what I should have been using

I learned late Wednesday that I'm supposed to give a talk at PyGotham about my forays into python dictionary land. So I did some research. And what I really should have been using is a csv reader flavor called dictreader. This does exist in python 2.6, so being stuck there is no excuse.

There's a good explanation at http://pymotw.com/2/csv/#using-field-names

And great examples at:

http://www.youlikeprogramming.com/2013/11/python-csv-reader-dictreader-quick-reference/

Margie's Tech Blog