-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPct_Cardholders_single_map_with_baselayer.py
More file actions
203 lines (176 loc) · 11.9 KB
/
Pct_Cardholders_single_map_with_baselayer.py
File metadata and controls
203 lines (176 loc) · 11.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
'''
Produces a choropleth map of the percentage of the population in each block group within a town that has a library card.
Script will need two files from the census bureau to accomplish that.
The tigerline shapefile for the block groups encompassing at least the specified geographic area (tl_2019_25_bg.zip) which can be obtained from https://www.census.gov/cgi-bin/geo/shapefiles/index.php
The estimated population table for the block groups encompassing the specified geographic area (2019 acs pop estimate bg.csv) which can be obtained from https://data.census.gov/cedsci/
'''
import json
import pandas as pd
import geopandas as gpd
import plotly.io as pio
import psycopg2
import configparser
import plotly.graph_objects as go
def runquery(tracts):
'''
function takes a list of census tract ids as a means of filtering the given sql query
Query returns a number of stats grouped by the block group of all patrons within the specified tracts
The key stat in the output, which will be used for the choropleth map later is the total_patron value.
Other values are simply additional field that will be included in the hover tooltip within the final map.
Query results are output as as pandas dataframe.
'''
# import configuration file containing our connection string
# app.ini looks like the following
#[db]
#connection_string = dbname='iii' user='PUT_USERNAME_HERE' host='sierra-db.library-name.org' password='PUT_PASSWORD_HERE' port=ENTER_PORT_NUMBER_HERE
config = configparser.ConfigParser()
config.read('app_info.ini')
query = """\
SELECT
CASE
WHEN v.field_content IS NULL THEN 'no data'
WHEN v.field_content = '' THEN v.field_content
ELSE SUBSTRING(REGEXP_REPLACE(v.field_content,'\|(s|c|t|b)','','g'),1,12)
END AS geoid,
COUNT(DISTINCT p.id) AS total_patrons,
SUM(p.checkout_total) AS total_checkouts,
SUM(p.renewal_total) AS total_renewals,
SUM(p.checkout_total + p.renewal_total) AS total_circ,
SUM(p.checkout_count) AS total_checkouts_current,
COUNT(DISTINCT h.id) AS total_holds_current,
ROUND(AVG(DATE_PART('year',AGE(CURRENT_DATE,p.birth_date_gmt::DATE)))) AS avg_age,
COUNT(DISTINCT p.id) FILTER(WHERE rm.creation_date_gmt::DATE >= NOW()::DATE - INTERVAL '1 year') AS total_new_patrons,
COUNT(DISTINCT p.id) FILTER(WHERE p.activity_gmt::DATE >= NOW()::DATE - INTERVAL '1 year') AS total_active_patrons,
ROUND(100.0 * (CAST(COUNT(DISTINCT p.id) FILTER(WHERE p.activity_gmt::DATE >= NOW()::DATE - INTERVAL '1 year') AS NUMERIC (12,2))) / CAST(COUNT(DISTINCT p.id) AS NUMERIC (12,2)), 2)::VARCHAR AS pct_active,
COUNT(DISTINCT p.id) FILTER(WHERE ((p.mblock_code != '-') OR (p.owed_amt >= 10))) as total_blocked_patrons,
ROUND(100.0 * (CAST(COUNT(DISTINCT p.id) FILTER(WHERE ((p.mblock_code != '-') OR (p.owed_amt >= 10))) as numeric (12,2)) / cast(COUNT(DISTINCT p.id) as numeric (12,2))),2)::VARCHAR AS pct_blocked,
ROUND((100.0 * SUM(p.checkout_total))/(100.0 *COUNT(DISTINCT p.id)),2)::VARCHAR AS checkouts_per_patron,
CASE
WHEN v.field_content IS NULL OR v.field_content = '' THEN 'na'
ELSE 'https://censusreporter.org/profiles/15000US'||SUBSTRING(REGEXP_REPLACE(v.field_content,'\|(s|c|t|b)','','g'),1,12)
END AS census_reporter_url
FROM sierra_view.patron_record p
JOIN sierra_view.patron_record_address a
ON p.id = a.patron_record_id AND a.patron_record_address_type_id = '1'
JOIN sierra_view.record_metadata rm
ON p.id = rm.id
LEFT JOIN sierra_view.hold h
ON p.id = h.patron_record_id
LEFT JOIN sierra_view.varfield v
ON v.record_id = p.id AND v.varfield_type_code = 'k' AND v.field_content ~ '^\|s\d{2}'
WHERE SUBSTRING(REGEXP_REPLACE(v.field_content,'\|(s|c|t|b)','','g'),6,6) IN ("""\
+tracts+"""\
)
GROUP BY 1,15
--HAVING COUNT(DISTINCT p.id) >= 80
ORDER BY 2 DESC
"""
try:
# variable connection string should be defined in the imported config file
conn = psycopg2.connect( config['db']['connection_string'] )
except:
print("unable to connect to the database")
clear_connection()
return
#Opening a session and querying the database for weekly new items
cursor = conn.cursor()
cursor.execute(query)
#For now, just storing the data in a variable. We'll use it later.
rows = cursor.fetchall()
conn.close()
#convert query results to Pandas data frame
column_names = ["geoid", "total_patrons", "total_checkouts","total_renewals","total_circ","total_checkouts_current","total_holds_current","avg_age","total_new_patrons","total_active_patrons","pct_active","total_blocked_patrons","pct_blocked","checkouts_per_patron","census_reporter_url"]
df = pd.DataFrame(rows, columns=column_names)
return df
def gen_map(library,patron_df,lat,lon,mapzoom):
'''
function produces plotly choropleth map based on query results from the runquery function
Pandas is used to merge the queryresults data frame with the tigerline shape file and estimated population table from the Census Bureau
Completed map is exported as a .html file
'''
#load shapefile into Pandas dataframe df
zipfile = "zip://Data Sources/tl_2019_25_bg.zip"
df = gpd.read_file(zipfile).to_crs("EPSG:4326")
df.columns= df.columns.str.lower()
#Merge shapefile dataframe df with the runquery results
df = df.merge(patron_df, on='geoid', how='inner')
#load population estimate into a data frame and merge it with the df dataframe
pop_df = pd.read_csv("/Data Sources/2019 acs pop estimate bg.csv", dtype={'geoid':str})
df = df.merge(pop_df, on='geoid', how='inner')
#Add a calculated field to the df for the percent of population that are cardholders and format the field to 2 decimal places
df['pct_cardholders'] = df.total_patrons / df.estimated_population * 100.00
df['pct_cardholders'] = df['pct_cardholders'].round(decimals=2)
#create a json form of the dataframe in order to pass it into the Choroplethmapbox function in Plotly
zipjson = json.loads(df.to_json())
#Produce Choropleth map via Plotly
'''
z field will determine the value used for shading each region of the map.
Full list of colorscale options available here https://plotly.com/python/builtin-colorscales/
hovertemplate used to change the fields and labels from the dataframe that will display in the hover tooltip box over each region of the map
zmin/max will allow you to override the default scale to help make adjustments if there is any anomolous data that may throw the scale off by forcing the min and max values to be used
'''
fig1 = go.Choroplethmapbox(geojson=zipjson, locations=df.geoid, featureidkey="properties.geoid", z=df.pct_cardholders,
colorscale="YlGnBu",
hovertemplate="<b>" + df.geographic_area_name + "</b><br>" +
"Cardholder %: " + df.pct_cardholders.astype(str) + "%" + "</b><br>" +
"Patron Total: " + df.total_patrons.astype(str) + "</b><br>" +
"Est. Pop: " + df.estimated_population.astype(str) + "</b><br>" +
"Total checkouts: " + df.total_checkouts.astype(str) + "</b><br>" +
"Active Patron %: " + df.pct_active.astype(str) + "%" + "</b><br>" +
"Blocked Patron %: " + df.pct_blocked.astype(str) + "%" + "<extra></extra>",
#zmin=0, zmax=1,
marker_opacity=0.50, marker_line_width=1, showlegend=False, showscale=True)
#Create a streetmap baselayer and overlay our produced map with it
fig = go.Figure(fig1)
fig.add_trace(fig1)
#baselayer street map comes from open street maps using the specified latitude/longitude center point and zoom level
fig.update_layout(mapbox_style="open-street-map",mapbox_zoom=mapzoom,mapbox_center={"lat": lat, "lon": lon},title = "Cardholder Percentage", legend_title="")
#Write resulting map to html file
pio.write_html(fig, file=library+'CardholderPct.html', auto_open=True)
def main(library,tracts,lat,lon,mapzoom):
query_results = runquery(tracts)
gen_map(library,query_results,lat,lon,mapzoom)
'''
Run for any number of location by calling the main function with the required variables
City/Town used for file naming
list of census tracts in that town to filter query on
latitude used for street map
longitude used for street map
zoom level used for street map
'''
#main('Acton',"'363102','363103','363104','363201','363202'")
#main('Arlington',"'356100','356200','356300','356400','356500','356601','356602','356701','356702','356703','356704'")
#main('Ashland',"'385100','385201','385202'")
#main('Bedford',"'359100','359300'")
#main('Belmont',"'357100','357200','357300','357400','357500','357600','357700','357800'")
#main('Brookline',"'400100','400200','400201','400300','400400','400500','400600','400700','400800','400900','401000','401100','401200'")
#main('Cambridge',"'352101','352102','352200','352300','352400','352500','352600','352700','352800','352900','353000','353101','353102','353200','353300','353400','353500','353600','353700','353800','353900','354000','354100','354200','354300','354400','354500','354600','354700','354800','354900','355000'")
#main('Concord',"'361100','361200','361300'")
#main('Dedham',"'402101','402102','402200','402300','402400','402500'")
#main('Dover',"'405100'")
#main('Framingham Public',"'383101','383102','383200','383300','383400','383501','383502','383600','383700','383800','383901','383902','384001','384002'")
#main('Franklin',"'442101','442102','442103','442201','442202'")
#main('Holliston',"'387100','387201','387202'")
#main('Lexington',"'358100','358200','358300','358400','358500','358600','358700'")
#main('Lincoln',"'360100','360200','360300'")
#main('Maynard',"'364101','364102'")
#main('Medfield',"'406101','406102'")
#main('Medford',"'339100','339200','339300','339400','339500','339600','339700','339801','339802','339900','340000','340100'")
#main('Medway',"'408101','408102'")
#main('Millis',"'407100'")
#main('Natick',"'382100','382200','382300','382400','382500','382601','382602'")
#main('Needham',"'403100','403300','403400','403500','457200'")
#main('Newton',"'373100','373200','373300','373400','373500','373600','373700','373800','373900','374000','374100','374200','374300','374400','374500','374600','374700','374800'")
#main('Norwood',"'413100','413200','413300','413401','413402','413500'")
#main('Sherborn',"'386100'")
#main('Somerville',"'350103','350104','350200','350300','350400','350500','350600','350700','350800','350900','351000','351100','351203','351204','351300','351403','351404','351500'")
#main('Stow',"'323100','980000'",42.4283,-71.5117,11)
#main('Sudbury',"'365100','365201','365202'",42.3890,-71.4225,11)
#main('Wayland',"'366100','366201','366202'")
#main('Waltham',"'368101','368102','368200','368300','368400','368500','368600','368700','368800','368901','368902','369000','369100'")
#main('Watertown',"'370101','370102','370104','370201','370202','370300','370301','370400','370401'")
#main('Wellesley',"'404100','404201','404202','404301','404302','404400'",42.2989,-71.2786,11)
#main('Weston',"'367100','367200'")
#main('Westwood',"'412100','412200','412300'",42.2210,-71.1985,11)
#main('Winchester',"'338100','338200','338300','338400','338500'")
#main('Woburn',"'333100','333200','333300','333400','333501','333502','333600'")