Patron-Maps/Pct_Cardholders_single_map_with_baselayer.py at main · Minuteman-Library-Network/Patron-Maps · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
'''
Produces a choropleth map of the percentage of the population in each block group within a town that has a library card.
Script will need two files from the census bureau to accomplish that.

The tigerline shapefile for the block groups encompassing at least the specified geographic area (tl_2019_25_bg.zip) which can be obtained from https://www.census.gov/cgi-bin/geo/shapefiles/index.php
The estimated population table for the block groups encompassing the specified geographic area (2019 acs pop estimate bg.csv) which can be obtained from https://data.census.gov/cedsci/
'''

import json
import pandas as pd
import geopandas as gpd
import plotly.io as pio
import psycopg2
import configparser
import plotly.graph_objects as go

def runquery(tracts):

    '''
    function takes a list of census tract ids as a means of filtering the given sql query
    Query returns a number of stats grouped by the block group of all patrons within the specified tracts
    The key stat in the output, which will be used for the choropleth map later is the total_patron value.
    Other values are simply additional field that will be included in the hover tooltip within the final map.
    Query results are output as as pandas dataframe.
    '''

    # import configuration file containing our connection string
    # app.ini looks like the following
    #[db]
    #connection_string = dbname='iii' user='PUT_USERNAME_HERE' host='sierra-db.library-name.org' password='PUT_PASSWORD_HERE' port=ENTER_PORT_NUMBER_HERE

    config = configparser.ConfigParser()
    config.read('app_info.ini')

    query = """\
            SELECT
            CASE
	            WHEN v.field_content IS NULL THEN 'no data'
	            WHEN v.field_content = '' THEN v.field_content
	            ELSE SUBSTRING(REGEXP_REPLACE(v.field_content,'\|(s|c|t|b)','','g'),1,12)
            END AS geoid,
            COUNT(DISTINCT p.id) AS total_patrons,
            SUM(p.checkout_total) AS total_checkouts,
            SUM(p.renewal_total) AS total_renewals,
            SUM(p.checkout_total + p.renewal_total) AS total_circ,
            SUM(p.checkout_count) AS total_checkouts_current,
            COUNT(DISTINCT h.id) AS total_holds_current,
            ROUND(AVG(DATE_PART('year',AGE(CURRENT_DATE,p.birth_date_gmt::DATE)))) AS avg_age,
            COUNT(DISTINCT p.id) FILTER(WHERE rm.creation_date_gmt::DATE >= NOW()::DATE - INTERVAL '1 year') AS total_new_patrons,
            COUNT(DISTINCT p.id) FILTER(WHERE p.activity_gmt::DATE >= NOW()::DATE - INTERVAL '1 year') AS total_active_patrons,
            ROUND(100.0 * (CAST(COUNT(DISTINCT p.id) FILTER(WHERE p.activity_gmt::DATE >= NOW()::DATE - INTERVAL '1 year') AS NUMERIC (12,2))) / CAST(COUNT(DISTINCT p.id) AS NUMERIC (12,2)), 2)::VARCHAR AS pct_active,
            COUNT(DISTINCT p.id) FILTER(WHERE ((p.mblock_code != '-') OR (p.owed_amt >= 10))) as total_blocked_patrons,
            ROUND(100.0 * (CAST(COUNT(DISTINCT p.id) FILTER(WHERE ((p.mblock_code != '-') OR (p.owed_amt >= 10))) as numeric (12,2)) / cast(COUNT(DISTINCT p.id) as numeric (12,2))),2)::VARCHAR AS pct_blocked,
            ROUND((100.0 * SUM(p.checkout_total))/(100.0 *COUNT(DISTINCT p.id)),2)::VARCHAR AS checkouts_per_patron,
            CASE
	            WHEN v.field_content IS NULL OR v.field_content = '' THEN 'na'
	            ELSE 'https://censusreporter.org/profiles/15000US'||SUBSTRING(REGEXP_REPLACE(v.field_content,'\|(s|c|t|b)','','g'),1,12)
            END AS census_reporter_url
            FROM sierra_view.patron_record p
            JOIN sierra_view.patron_record_address a
            ON p.id = a.patron_record_id AND a.patron_record_address_type_id = '1'
            JOIN sierra_view.record_metadata rm
            ON p.id = rm.id
            LEFT JOIN sierra_view.hold h
            ON p.id = h.patron_record_id
            LEFT JOIN sierra_view.varfield v
            ON v.record_id = p.id AND v.varfield_type_code = 'k' AND v.field_content ~ '^\|s\d{2}'
            WHERE SUBSTRING(REGEXP_REPLACE(v.field_content,'\|(s|c|t|b)','','g'),6,6) IN ("""\
            +tracts+"""\
            )
            GROUP BY 1,15
            --HAVING COUNT(DISTINCT p.id) >= 80
            ORDER BY 2 DESC
            """

    try:
	    # variable connection string should be defined in the imported config file
        conn = psycopg2.connect( config['db']['connection_string'] )
    except:
        print("unable to connect to the database")
        clear_connection()
        return

    #Opening a session and querying the database for weekly new items
    cursor = conn.cursor()
    cursor.execute(query)
    #For now, just storing the data in a variable. We'll use it later.
    rows = cursor.fetchall()
    conn.close()

    #convert query results to Pandas data frame
    column_names = ["geoid", "total_patrons", "total_checkouts","total_renewals","total_circ","total_checkouts_current","total_holds_current","avg_age","total_new_patrons","total_active_patrons","pct_active","total_blocked_patrons","pct_blocked","checkouts_per_patron","census_reporter_url"]
    df = pd.DataFrame(rows, columns=column_names)

    return df

def gen_map(library,patron_df,lat,lon,mapzoom):
    '''
    function produces plotly choropleth map based on query results from the runquery function

    Pandas is used to merge the queryresults data frame with the tigerline shape file and estimated population table from the Census Bureau
    Completed map is exported as a .html file
    '''

    #load shapefile into Pandas dataframe df
    zipfile = "zip://Data Sources/tl_2019_25_bg.zip"
    df = gpd.read_file(zipfile).to_crs("EPSG:4326")
    df.columns= df.columns.str.lower()

    #Merge shapefile dataframe df with the runquery results
    df = df.merge(patron_df, on='geoid', how='inner')

    #load population estimate into a data frame and merge it with the df dataframe
    pop_df = pd.read_csv("/Data Sources/2019 acs pop estimate bg.csv", dtype={'geoid':str})
    df = df.merge(pop_df, on='geoid', how='inner')

    #Add a calculated field to the df for the percent of population that are cardholders and format the field to 2 decimal places
    df['pct_cardholders'] = df.total_patrons / df.estimated_population * 100.00
    df['pct_cardholders'] = df['pct_cardholders'].round(decimals=2)

    #create a json form of the dataframe in order to pass it into the Choroplethmapbox function in Plotly
    zipjson = json.loads(df.to_json())

    #Produce Choropleth map via Plotly
    '''
    z field will determine the value used for shading each region of the map.
    Full list of colorscale options available here https://plotly.com/python/builtin-colorscales/
    hovertemplate used to change the fields and labels from the dataframe that will display in the hover tooltip box over each region of the map
    zmin/max will allow you to override the default scale to help make adjustments if there is any anomolous data that may throw the scale off by forcing the min and max values to be used
    '''
    fig1 = go.Choroplethmapbox(geojson=zipjson, locations=df.geoid, featureidkey="properties.geoid", z=df.pct_cardholders,
                           colorscale="YlGnBu",
                           hovertemplate="<b>" + df.geographic_area_name + "</b><br>" +
                                         "Cardholder %: " + df.pct_cardholders.astype(str) + "%" + "</b><br>" +
                                         "Patron Total: " + df.total_patrons.astype(str) + "</b><br>" +
                                         "Est. Pop: " + df.estimated_population.astype(str) + "</b><br>" +
                                         "Total checkouts: " + df.total_checkouts.astype(str) + "</b><br>" +
                                         "Active Patron %: " + df.pct_active.astype(str) + "%" + "</b><br>" +
                                         "Blocked Patron %: " + df.pct_blocked.astype(str) + "%" + "<extra></extra>",
                           #zmin=0, zmax=1,
                           marker_opacity=0.50, marker_line_width=1, showlegend=False, showscale=True)

    #Create a streetmap baselayer and overlay our produced map with it
    fig = go.Figure(fig1)
    fig.add_trace(fig1)
    #baselayer street map comes from open street maps using the specified latitude/longitude center point and zoom level
    fig.update_layout(mapbox_style="open-street-map",mapbox_zoom=mapzoom,mapbox_center={"lat": lat, "lon": lon},title = "Cardholder Percentage", legend_title="")

    #Write resulting map to html file
    pio.write_html(fig, file=library+'CardholderPct.html', auto_open=True)

def main(library,tracts,lat,lon,mapzoom):
    query_results = runquery(tracts)

    gen_map(library,query_results,lat,lon,mapzoom)

'''
Run for any number of location by calling the main function with the required variables
City/Town used for file naming
list of census tracts in that town to filter query on
latitude used for street map
longitude used for street map
zoom level used for street map
'''
#main('Acton',"'363102','363103','363104','363201','363202'")
#main('Arlington',"'356100','356200','356300','356400','356500','356601','356602','356701','356702','356703','356704'")
#main('Ashland',"'385100','385201','385202'")
#main('Bedford',"'359100','359300'")
#main('Belmont',"'357100','357200','357300','357400','357500','357600','357700','357800'")
#main('Brookline',"'400100','400200','400201','400300','400400','400500','400600','400700','400800','400900','401000','401100','401200'")
#main('Cambridge',"'352101','352102','352200','352300','352400','352500','352600','352700','352800','352900','353000','353101','353102','353200','353300','353400','353500','353600','353700','353800','353900','354000','354100','354200','354300','354400','354500','354600','354700','354800','354900','355000'")
#main('Concord',"'361100','361200','361300'")
#main('Dedham',"'402101','402102','402200','402300','402400','402500'")
#main('Dover',"'405100'")
#main('Framingham Public',"'383101','383102','383200','383300','383400','383501','383502','383600','383700','383800','383901','383902','384001','384002'")
#main('Franklin',"'442101','442102','442103','442201','442202'")
#main('Holliston',"'387100','387201','387202'")
#main('Lexington',"'358100','358200','358300','358400','358500','358600','358700'")
#main('Lincoln',"'360100','360200','360300'")
#main('Maynard',"'364101','364102'")
#main('Medfield',"'406101','406102'")
#main('Medford',"'339100','339200','339300','339400','339500','339600','339700','339801','339802','339900','340000','340100'")
#main('Medway',"'408101','408102'")
#main('Millis',"'407100'")
#main('Natick',"'382100','382200','382300','382400','382500','382601','382602'")
#main('Needham',"'403100','403300','403400','403500','457200'")
#main('Newton',"'373100','373200','373300','373400','373500','373600','373700','373800','373900','374000','374100','374200','374300','374400','374500','374600','374700','374800'")
#main('Norwood',"'413100','413200','413300','413401','413402','413500'")
#main('Sherborn',"'386100'")
#main('Somerville',"'350103','350104','350200','350300','350400','350500','350600','350700','350800','350900','351000','351100','351203','351204','351300','351403','351404','351500'")
#main('Stow',"'323100','980000'",42.4283,-71.5117,11)
#main('Sudbury',"'365100','365201','365202'",42.3890,-71.4225,11)
#main('Wayland',"'366100','366201','366202'")
#main('Waltham',"'368101','368102','368200','368300','368400','368500','368600','368700','368800','368901','368902','369000','369100'")
#main('Watertown',"'370101','370102','370104','370201','370202','370300','370301','370400','370401'")
#main('Wellesley',"'404100','404201','404202','404301','404302','404400'",42.2989,-71.2786,11)
#main('Weston',"'367100','367200'")
#main('Westwood',"'412100','412200','412300'",42.2210,-71.1985,11)
#main('Winchester',"'338100','338200','338300','338400','338500'")
#main('Woburn',"'333100','333200','333300','333400','333501','333502','333600'")