1
1
---
2
- title : " Iowa Contributions"
3
- author : " Kiernan Nicholls"
2
+ title : " Iowa Campaign Contributions"
3
+ author : " Kiernan Nicholls & Yanqi Xu "
4
4
date : " `r Sys.time()`"
5
5
output :
6
6
github_document :
@@ -34,7 +34,7 @@ if (!interactive()) {
34
34
```
35
35
36
36
``` {r create_docs_dir, eval=FALSE, echo=FALSE, include=FALSE}
37
- fs::dir_create(here::here("ia", "contribs", "docs"))
37
+ fs::dir_create(here::here("state"," ia", "contribs", "docs"))
38
38
```
39
39
40
40
## Project
@@ -114,7 +114,7 @@ here::here()
114
114
115
115
## Data
116
116
117
- Data is obtained from the [ Iowa Ethics and Campaign Disclosure Board] [ iae ] .
117
+ Data is obtained from the [ Iowa Ethics and Campaign Disclosure Board] [ iae ] . The API returns contributions dating back to ` 2003-01-01 ` .
118
118
119
119
> In order to accomplish its Mission, the Board will enforce the provisions of
120
120
the "Campaign Disclosure Act" in Iowa Code chapter 68A, the "Government Ethics
@@ -125,12 +125,13 @@ rules in Chapter 351 of the Iowa Administrative Code.
125
125
[ iae ] : https://ethics.iowa.gov/
126
126
127
127
The Board provides the file through the [ state open data portal] [ odp ] under the
128
- title "Iowa Campaign Contributions Received." The data can be accessed as a
128
+ title [ "Iowa Campaign Contributions Received."] [ cont ] The data can be accessed as a
129
129
tabular CSV file or through a number of direct APIs.
130
130
131
- The database was created June 18, 2015 and last updated December 10, 2019 .
131
+ The database was created June 18, 2015 and last updated July 1, 2023 .
132
132
133
133
[ odp ] : https://data.iowa.gov/
134
+ [ cont ] :https://data.iowa.gov/Campaigns-Elections/Iowa-Campaign-Contributions-Received/smfg-ds7h
134
135
135
136
> This dataset contains information on contributions and in kind donations made
136
137
by organizations and individuals to state-wide, legislative or local candidate
@@ -173,7 +174,7 @@ These fixed files can be read into a single data frame with `purrr::map_df()`
173
174
and ` readr::read_delim() ` .
174
175
175
176
``` {r raw_dir}
176
- raw_dir <- dir_create(here("ia", "contribs", "data", "raw"))
177
+ raw_dir <- dir_create(here("state"," ia", "contribs", "data", "raw"))
177
178
raw_url <- "https://data.iowa.gov/api/views/smfg-ds7h/rows.csv"
178
179
raw_path <- path(raw_dir, basename(raw_url))
179
180
if (!this_file_new(raw_path)) {
@@ -187,32 +188,29 @@ iac <- vroom(
187
188
na = c("", "N/A", "NA", "n/a", "na"),
188
189
col_types = cols(
189
190
.default = col_character(),
190
- `Date` = col_date_usa (),
191
+ `Date` = col_date_mdy (),
191
192
`Contribution Amount` = col_double()
192
193
)
193
194
)
194
195
```
195
196
196
197
``` {r raw_rename, echo=FALSE}
197
198
iac <- iac %>%
198
- clean_names("snake") %>%
199
+ clean_names("snake") %>%
199
200
rename(
200
201
code = committee_code,
201
202
committee = committee_name,
202
203
type = transaction_type,
203
204
cont_comm_cd = contributing_committee_code,
204
205
cont_org = contributing_organization,
205
206
first = first_name,
206
- mi = middle_initial,
207
207
last = last_name,
208
208
addr1 = address_line_1,
209
209
addr2 = address_line_2,
210
210
zip = zip_code,
211
- amount = contribution_amount,
212
- tx = transaction_id,
211
+ amount = contribution_amount
213
212
) %>%
214
- mutate(last = coalesce(last, cont_org)) %>%
215
- select(-cont_org, -cont_comm_cd)
213
+ select(-cont_comm_cd)
216
214
```
217
215
218
216
We can ensure this file was read correctly by counting distinct values of a
@@ -268,7 +266,8 @@ iac %>%
268
266
We can create a file containing every duplicate record in the data.
269
267
270
268
``` {r dupe_write}
271
- dupe_file <- path(dirname(raw_dir), "dupes.csv")
269
+ dupe_file <- path(raw_dir, "dupes.csv")
270
+ iac <- rowid_to_column(iac, var = "tx")
272
271
if (!file_exists(dupe_file)) {
273
272
write_lines("tx,dupe_flag", dupe_file)
274
273
iac <- mutate(iac, group = str_sub(date, end = 7))
@@ -279,7 +278,7 @@ if (!file_exists(dupe_file)) {
279
278
pb <- txtProgressBar(max = length(ias), style = 3)
280
279
for (i in seq_along(ias)) {
281
280
write_csv(
282
- path = dupe_file,
281
+ file = dupe_file,
283
282
append = TRUE,
284
283
x = tibble(
285
284
tx = ia_tx[[i]],
@@ -300,7 +299,7 @@ if (!file_exists(dupe_file)) {
300
299
dupes <- read_csv(
301
300
file = dupe_file,
302
301
col_types = cols(
303
- tx = col_character (),
302
+ tx = col_integer (),
304
303
dupe_flag = col_logical()
305
304
)
306
305
)
@@ -321,6 +320,10 @@ iac %>%
321
320
arrange(date, last)
322
321
```
323
322
323
+ ``` {r}
324
+ iac <- iac %>% select(-tx)
325
+ ```
326
+
324
327
## Categorical
325
328
326
329
``` {r n_distinct}
@@ -362,7 +365,7 @@ iac %>%
362
365
filter(!is.na(amount), amount >= 1) %>%
363
366
ggplot(aes(x = type, y = amount)) +
364
367
geom_violin(aes(fill = type), adjust = 3) +
365
- scale_fill_brewer(palette = "Dark2", guide = FALSE ) +
368
+ scale_fill_brewer(palette = "Dark2", guide = "none" ) +
366
369
scale_y_continuous(
367
370
breaks = c(1 %o% 10^(0:6)),
368
371
labels = dollar,
@@ -391,7 +394,7 @@ iac %>%
391
394
geom_col(aes(fill = even)) +
392
395
scale_fill_brewer(palette = "Dark2") +
393
396
scale_y_continuous(labels = comma) +
394
- scale_x_continuous(breaks = seq(1998, 2020 , by = 2)) +
397
+ scale_x_continuous(breaks = seq(1998, 2024 , by = 2)) +
395
398
theme(legend.position = "bottom") +
396
399
labs(
397
400
title = "Iowa Contributions by Year",
@@ -672,7 +675,7 @@ glimpse(sample_n(iac, 50))
672
675
Now the file can be saved on disk for upload to the Accountability server.
673
676
674
677
``` {r clean_dir}
675
- clean_dir <- dir_create(here("ia", "contribs", "data", "clean"))
678
+ clean_dir <- dir_create(here("state"," ia", "contribs", "data", "clean"))
676
679
clean_path <- path(clean_dir, "ia_contribs_clean.csv")
677
680
write_csv(iac, clean_path, na = "")
678
681
(clean_size <- file_size(clean_path))
@@ -684,7 +687,7 @@ file_encoding(clean_path) %>%
684
687
685
688
We can use the ` aws.s3::put_object() ` to upload the text file to the IRW server.
686
689
687
- ``` {r aws_upload, eval=TRUE }
690
+ ``` {r aws_upload, eval=FALSE }
688
691
aws_path <- path("csv", basename(clean_path))
689
692
if (!object_exists(aws_path, "publicaccountability")) {
690
693
put_object(
@@ -710,20 +713,21 @@ dict_raw <- tibble(
710
713
var = md_code(names(iac)),
711
714
type = md_code(map_chr(iac, typeof)),
712
715
def = c(
713
- "Unique transaction hash",
714
716
"Date contribution was made",
715
717
"Recipient committee code",
718
+ "Recipient committee type",
716
719
"Recipient committee name",
717
720
"Type of contribution (direct, in-kind)",
721
+ "Contributor organization",
718
722
"Contributor first name",
719
- "Contributor middle initial",
720
- "Contributor last name or organization",
723
+ "Contributor last name",
721
724
"Contributor street address",
722
725
"Contributor secondary address",
723
726
"Contributor state abbreviation",
724
727
"Contributor city name",
725
728
"Contributor ZIP+4 code",
726
729
"Amount or correction",
730
+ "Check number",
727
731
"Flag for missing value",
728
732
"Flag for duplicate row",
729
733
"Calendar year contribution made",
0 commit comments