In [1]:
import glob, re
import csv

catalog = {}

f = open('selected_muncie_titles_102218.csv', 'r', encoding='utf-8')

r = csv.reader(f)
for row in r:
    
    try:
        author = row[0]
        title = row[1]
        accession_nbr = row[3]
        transactions = int(row[4])
        file_name = row[10]
        
        if (author, title) not in catalog:
            catalog[(author, title)] = {'accession_numbers': [], 'dates': []}

        catalog[(author, title)]['accession_numbers'].append(accession_nbr)
    
    except ValueError:
        pass
    
f.close()

#print(len(catalog))
In [2]:
import pymysql

connection = pymysql.connect(host='localhost',
                             user='root',
                             password='p5a1m99',
                             db='middletown1',
                             charset='utf8',
                             cursorclass=pymysql.cursors.DictCursor)

for k, v in catalog.items():
    
    for accession_number in v['accession_numbers']:

        with connection.cursor() as cursor:

            sql = "SELECT transaction_date FROM flattenedData " + \
                    "WHERE accessionNbr=%s;"
            
            cursor.execute(sql, (accession_number,))
            
            results = cursor.fetchall()
            
            for r in results:
                catalog[k]['dates'].append(str(r['transaction_date']))

#print(len(catalog))
#print(list(catalog.keys())[0])
In [3]:
for k in catalog.keys():
    catalog[k]['date_counts'] = {}
    for d in catalog[k]['dates']:
        short_d = '-'.join(d.split('-')[:2])
        if short_d not in catalog[k]['date_counts']:
            catalog[k]['date_counts'][short_d] = 0
        catalog[k]['date_counts'][short_d] += 1
In [4]:
#print(catalog[('Stowe, Harriet Beecher', 'Uncle Toms Cabin')])
#print(len(catalog[('Stowe, Harriet Beecher', 'Uncle Toms Cabin')]['dates']))
n = 0
for k, v in catalog[('Stowe, Harriet Beecher', 'Uncle Toms Cabin')]['date_counts'].items():
    n += v
#print(n)
In [5]:
low_date = '9999-99'
high_date = '0000-00'
all_dates = []

for k, v in catalog.items():
    for dk, dv in v['date_counts'].items():
        all_dates.append(dk)
        if  dk < low_date:
            low_date = dk
        if  dk > high_date:
            high_date = dk
            
all_dates = sorted(list(set(all_dates)))

gap_dates = []

for y in range(1891, 1903):
    for m in range(1, 13):
        short_d = str(y) + '-' + str(m).zfill(2)
        if short_d <= '1891-10':
            pass
        elif short_d not in all_dates:
            gap_dates.append(short_d)

template_date_hash = {}
for d in all_dates + gap_dates:
    template_date_hash[d] = 0
            
#print(low_date, high_date)
#print()
#print(all_dates)
#print()
#print(gap_dates)
#print()
#print(template_date_hash)
In [6]:
import copy

all_x_and_y = []
    
everything_date_hash = copy.deepcopy(template_date_hash)

for author_title in sorted(list(catalog.keys())):
    
    date_hash = copy.deepcopy(template_date_hash)
    
    for d, n in catalog[author_title]['date_counts'].items():
        everything_date_hash[d] += n
        date_hash[d] += n
    
    x = []
    y = []
    
    for d in sorted(date_hash.keys()):
        x.append(d)
        y.append(date_hash[d])
        
    all_x_and_y.append(['. '.join(author_title) + '.', x, y, catalog[author_title]['accession_numbers']])
    
high_y = -1

for row in all_x_and_y:
    for y in row[2]:
        if y > high_y:
            high_y = y
            
print(high_y)
20
In [7]:
import pymysql

def get_accession_dates(accession_numbers):
    
    accession_dates = []

    connection = pymysql.connect(host='localhost',
                                 user='root',
                                 password='p5a1m99',
                                 db='middletown1',
                                 charset='utf8',
                                 cursorclass=pymysql.cursors.DictCursor)

    for accession_number in accession_numbers:

        with connection.cursor() as cursor:

            sql = "SELECT ACC_DATE_STAND FROM books " + \
                    "WHERE ACCESSION_NUMBER=%s;"

            cursor.execute(sql, (accession_number,))

            results = cursor.fetchall()

            for r in results:
                accession_dates.append(str(r['ACC_DATE_STAND']))
                
    return ' '.join(accession_dates)
In [8]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

everything_high_y = -1

x = []
y = []

for d in sorted(everything_date_hash.keys()):
    x.append(d)
    y.append(everything_date_hash[d])
    if everything_date_hash[d] > everything_high_y:
        everything_high_y = everything_date_hash[d]

x_ticks = []
x_ticks_locs = []
for d in sorted(all_dates + gap_dates):
    if d.endswith('-01'):
        x_ticks.append(d.split('-')[0])
        x_ticks_locs.append(d)

plt.rcParams["figure.figsize"] = (20, 5)

plt.ylim(0, everything_high_y) 

plt.bar(x, y)
plt.xticks(x_ticks_locs, x_ticks)
plt.xlabel('date')
plt.ylabel('n checkouts')
plt.title('Everything')
plt.show()
In [9]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

x_ticks = []
x_ticks_locs = []
for d in sorted(all_dates + gap_dates):
    if d.endswith('-01'):
        x_ticks.append(d.split('-')[0])
        x_ticks_locs.append(d)

plt.rcParams["figure.figsize"] = (20, 5)

for row in all_x_and_y:

    plt.ylim(0, high_y) 
    
    plt.bar(row[1], row[2])
    plt.xticks(x_ticks_locs, x_ticks)
    plt.xlabel('date')
    plt.ylabel('n checkouts')
    plt.title(row[0] + ' ACC: ' + get_accession_dates(row[3]))
    plt.show()
In [ ]: