import glob, re
import csv
catalog = {}
f = open('selected_muncie_titles_102218.csv', 'r', encoding='utf-8')
r = csv.reader(f)
for row in r:
try:
author = row[0]
title = row[1]
accession_nbr = row[3]
transactions = int(row[4])
file_name = row[10]
if (author, title) not in catalog:
catalog[(author, title)] = {'accession_numbers': [], 'dates': []}
catalog[(author, title)]['accession_numbers'].append(accession_nbr)
except ValueError:
pass
f.close()
#print(len(catalog))
import pymysql
connection = pymysql.connect(host='localhost',
user='root',
password='p5a1m99',
db='middletown1',
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
for k, v in catalog.items():
for accession_number in v['accession_numbers']:
with connection.cursor() as cursor:
sql = "SELECT transaction_date FROM flattenedData " + \
"WHERE accessionNbr=%s;"
cursor.execute(sql, (accession_number,))
results = cursor.fetchall()
for r in results:
catalog[k]['dates'].append(str(r['transaction_date']))
#print(len(catalog))
#print(list(catalog.keys())[0])
for k in catalog.keys():
catalog[k]['date_counts'] = {}
for d in catalog[k]['dates']:
short_d = '-'.join(d.split('-')[:2])
if short_d not in catalog[k]['date_counts']:
catalog[k]['date_counts'][short_d] = 0
catalog[k]['date_counts'][short_d] += 1
#print(catalog[('Stowe, Harriet Beecher', 'Uncle Toms Cabin')])
#print(len(catalog[('Stowe, Harriet Beecher', 'Uncle Toms Cabin')]['dates']))
n = 0
for k, v in catalog[('Stowe, Harriet Beecher', 'Uncle Toms Cabin')]['date_counts'].items():
n += v
#print(n)
low_date = '9999-99'
high_date = '0000-00'
all_dates = []
for k, v in catalog.items():
for dk, dv in v['date_counts'].items():
all_dates.append(dk)
if dk < low_date:
low_date = dk
if dk > high_date:
high_date = dk
all_dates = sorted(list(set(all_dates)))
gap_dates = []
for y in range(1891, 1903):
for m in range(1, 13):
short_d = str(y) + '-' + str(m).zfill(2)
if short_d <= '1891-10':
pass
elif short_d not in all_dates:
gap_dates.append(short_d)
template_date_hash = {}
for d in all_dates + gap_dates:
template_date_hash[d] = 0
#print(low_date, high_date)
#print()
#print(all_dates)
#print()
#print(gap_dates)
#print()
#print(template_date_hash)
import copy
all_x_and_y = []
everything_date_hash = copy.deepcopy(template_date_hash)
for author_title in sorted(list(catalog.keys())):
date_hash = copy.deepcopy(template_date_hash)
for d, n in catalog[author_title]['date_counts'].items():
everything_date_hash[d] += n
date_hash[d] += n
x = []
y = []
for d in sorted(date_hash.keys()):
x.append(d)
y.append(date_hash[d])
all_x_and_y.append(['. '.join(author_title) + '.', x, y, catalog[author_title]['accession_numbers']])
high_y = -1
for row in all_x_and_y:
for y in row[2]:
if y > high_y:
high_y = y
print(high_y)
import pymysql
def get_accession_dates(accession_numbers):
accession_dates = []
connection = pymysql.connect(host='localhost',
user='root',
password='p5a1m99',
db='middletown1',
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
for accession_number in accession_numbers:
with connection.cursor() as cursor:
sql = "SELECT ACC_DATE_STAND FROM books " + \
"WHERE ACCESSION_NUMBER=%s;"
cursor.execute(sql, (accession_number,))
results = cursor.fetchall()
for r in results:
accession_dates.append(str(r['ACC_DATE_STAND']))
return ' '.join(accession_dates)
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
everything_high_y = -1
x = []
y = []
for d in sorted(everything_date_hash.keys()):
x.append(d)
y.append(everything_date_hash[d])
if everything_date_hash[d] > everything_high_y:
everything_high_y = everything_date_hash[d]
x_ticks = []
x_ticks_locs = []
for d in sorted(all_dates + gap_dates):
if d.endswith('-01'):
x_ticks.append(d.split('-')[0])
x_ticks_locs.append(d)
plt.rcParams["figure.figsize"] = (20, 5)
plt.ylim(0, everything_high_y)
plt.bar(x, y)
plt.xticks(x_ticks_locs, x_ticks)
plt.xlabel('date')
plt.ylabel('n checkouts')
plt.title('Everything')
plt.show()
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
x_ticks = []
x_ticks_locs = []
for d in sorted(all_dates + gap_dates):
if d.endswith('-01'):
x_ticks.append(d.split('-')[0])
x_ticks_locs.append(d)
plt.rcParams["figure.figsize"] = (20, 5)
for row in all_x_and_y:
plt.ylim(0, high_y)
plt.bar(row[1], row[2])
plt.xticks(x_ticks_locs, x_ticks)
plt.xlabel('date')
plt.ylabel('n checkouts')
plt.title(row[0] + ' ACC: ' + get_accession_dates(row[3]))
plt.show()