FROM_DATE = '1800-01-01'
TO_DATE = '2000-12-31'
import glob, re
import csv
catalog = []
checkouts = {}
f = open('selected_muncie_titles_102218.csv', 'r', encoding='utf-8')
r = csv.reader(f)
for row in r:
try:
author = row[0]
title = row[1]
accession_nbr = row[3]
transactions = int(row[4])
file_name = row[10]
catalog.append([author, title, accession_nbr, transactions, file_name])
if (author + '. ' + title + '.') not in checkouts:
checkouts[author + '. ' + title + '.'] = 0
checkouts[author + '. ' + title + '.'] += transactions
except ValueError:
pass
f.close()
#print(len(catalog))
#print(catalog[:3])
#print(checkouts.keys())
import pymysql
import statistics
connection = pymysql.connect(host='localhost',
user='root',
password='p5a1m99',
db='middletown1',
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
who_read_what = []
for i, entry in enumerate(catalog):
with connection.cursor() as cursor:
sql = "SELECT count(*) FROM flattenedData " + \
"WHERE accessionNbr=%s " + \
"AND TRUST_THIS_CENSUS = 1 " + \
'AND transaction_date >= \'' + FROM_DATE + '\' and transaction_date <= \'' + TO_DATE + '\' ;'
cursor.execute(sql, (entry[2],))
results = cursor.fetchall()
for r in results:
catalog[i][3] = r['count(*)']
with connection.cursor() as cursor:
sql = "SELECT cleanedPatronName, RACE, GENDER, BIRTH_YEAR, " + \
"BIRTHPLACE, FATHER_BIRTHPLACE, MOTHER_BIRTHPLACE, " + \
"MARITAL_STATUS, YEARS_MARRIED, RESIDENTIAL_STATUS, " + \
"NUM_BOARDERS, NUM_SERVANTS, YEAR_OF_IMMIGRATION, NATURALIZATION_STATUS, " + \
"PROFESSION_TITLE, PROFESSION_CLASS, occupation_group, occupation_rank, " + \
"count(*) " + \
"FROM flattenedData " + \
"WHERE accessionNbr=%s " + \
"AND TRUST_THIS_CENSUS = 1 " + \
"AND transaction_date >= '" + FROM_DATE + "' and transaction_date <= '" + TO_DATE + "' " + \
'GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18;'
cursor.execute(sql, (entry[2],))
results = cursor.fetchall()
for r in results:
who_read_what.append([entry[0] + '. ' + entry[1] + '.', r])
#print(catalog[:3])
#print()
#print(who_read_what[:10])
#print()
#print(len(who_read_what))
import pymysql
import statistics
connection = pymysql.connect(host='localhost',
user='root',
password='p5a1m99',
db='middletown1',
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
all_readers = []
with connection.cursor() as cursor:
sql = "SELECT cleanedPatronName, RACE, GENDER, BIRTH_YEAR, " + \
"BIRTHPLACE, FATHER_BIRTHPLACE, MOTHER_BIRTHPLACE, " + \
"MARITAL_STATUS, YEARS_MARRIED, RESIDENTIAL_STATUS, " + \
"NUM_BOARDERS, NUM_SERVANTS, YEAR_OF_IMMIGRATION, NATURALIZATION_STATUS, " + \
"PROFESSION_TITLE, PROFESSION_CLASS, occupation_group, occupation_rank, " + \
"count(*) " + \
"FROM flattenedData " + \
"WHERE TRUST_THIS_CENSUS = 1 " + \
"AND transaction_date >= '" + FROM_DATE + "' and transaction_date <= '" + TO_DATE + "' " + \
'GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18;'
cursor.execute(sql)
results = cursor.fetchall()
for r in results:
all_readers.append(['', r])
#print(catalog[:3])
#print()
#print(all_readers[:10])
#print()
#print(len(all_readers))
from collections import Counter
def report_key_count(readership_data, key, label):
total_n = 0
results = {}
for r in readership_data:
if r[1][key] not in results:
results[r[1][key]] = 0
results[r[1][key]] += r[1]['count(*)']
total_n += r[1]['count(*)']
print()
print(label, key)
print()
for w in Counter(results).most_common(10):
print('\t', w[0], '{:4.1f}'.format(float(w[1] / float(total_n) * 100.0)) + '%')
for k in all_readers[0][1].keys():
if k not in ['count(*)', 'cleanedPatronName']:
print()
print('------------------------------------------------------')
report_key_count(who_read_what, k, 'LOST CAUSE')
report_key_count(all_readers, k, 'ALL_MUNCIE')