Skip to content

Commit 4a18dfb

Browse files
committed
PA contribs, IA contribs, OH lob
1 parent 649c03b commit 4a18dfb

21 files changed

+1293
-786
lines changed
Binary file not shown.

state/fl/lobbying/reg/docs/fl_lobby_reg_diary.Rmd

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
title: "Florida Lobbyist Registration"
3-
author: "Kiernan Nicholls"
3+
author: "Kiernan Nicholls & Yanqi Xu"
44
date: "`r Sys.time()`"
55
output:
66
github_document:
@@ -32,7 +32,7 @@ set.seed(5)
3232
```
3333

3434
```{r create_docs_dir, eval=FALSE, echo=FALSE, include=FALSE}
35-
fs::dir_create(here::here("fl", "lobbying", "reg", "docs"))
35+
fs::dir_create(here::here("state","fl", "lobbying", "reg", "docs"))
3636
```
3737

3838
## Project
@@ -118,7 +118,7 @@ to view the file format. The datasets are updated daily.
118118

119119
```{r download_key}
120120
key_url <- "https://floridalobbyist.gov/reports/disk%20file%20definition.pdf?cp=0.3379601757893852"
121-
download.file(key_url, destfile = url2path(key_url, here("fl", "lobbying", "reg", "docs")))
121+
download.file(key_url, destfile = url2path(key_url, here("state","fl", "lobbying", "reg", "docs")))
122122
```
123123

124124
## Import
@@ -136,7 +136,7 @@ processor, spreadsheet, or database program.
136136
We can download these two files to our raw directory.
137137

138138
```{r create_raw_dir}
139-
raw_dir <- here("fl", "lobbying", "reg", "data", "raw")
139+
raw_dir <- here("state","fl", "lobbying", "reg", "data", "raw")
140140
dir_create(raw_dir)
141141
```
142142

@@ -165,8 +165,8 @@ fllr <- vroom(
165165
skip = 2,
166166
col_types = cols(
167167
.default = col_character(),
168-
`Eff Date` = col_date_usa(),
169-
`WD Date` = col_date_usa(),
168+
`Eff Date` = col_date_mdy(),
169+
`WD Date` = col_date_mdy(),
170170
)
171171
)
172172
```

state/hi/lobbying/docs/hi_lobby_diary.Rmd

Lines changed: 10 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
title: "Hawaii Lobbyists"
3-
author: "Kiernan Nicholls"
3+
author: "Kiernan Nicholls & Yanqi Xu"
44
date: "`r Sys.time()`"
55
output:
66
github_document:
@@ -32,7 +32,7 @@ set.seed(5)
3232
```
3333

3434
```{r create_docs_dir, eval=FALSE, echo=FALSE, include=FALSE}
35-
fs::dir_create(here::here("hi", "lobbying", "docs"))
35+
fs::dir_create(here::here("state","hi", "lobbying", "docs"))
3636
```
3737

3838
## Project
@@ -107,94 +107,31 @@ here::here()
107107

108108
## Data
109109

110-
Data can be obtained by the [Hawaii State Ethics Commission][hec] via their [Socrata portal][hsp].
110+
Data can be obtained by the [Hawaii State Ethics Commission][hec] via their [Socrata portal][hsp]. The [search tool][download] lets us export all lobbying registration data from 2019-2023 in a csv format.
111111

112112
[hec]: https://ethics.hawaii.gov/
113113
[hsp]: https://data.hawaii.gov/
114+
[download]:https://hawaiiethics.my.site.com/public/s/reports?report=Lobbyist%20Registrations
114115

115-
```{r meta_read, echo=FALSE}
116-
hi_meta <- fromJSON("https://data.hawaii.gov/api/views/gdxe-t5ff")
117-
```
118-
119-
The relavent file is named `r quote(hi_meta$name)` with the ID of `r md_code(hi_meta$id)`. The file
120-
was created at `r as_datetime(hi_meta$createdAt)`.
121-
122-
There are `r nrow(hi_meta$columns)` columns in the database.
123-
124-
```{r meta_cols}
125-
hi_meta %>%
126-
use_series("columns") %>%
127-
as_tibble() %>%
128-
select(
129-
position,
130-
fieldName,
131-
name,
132-
dataTypeName
133-
) %>%
134-
mutate(fieldName = md_code(fieldName)) %>%
135-
kable(col.names = c("col", "variable", "name", "type"))
136-
```
137116

138-
This data does _not_ include the mailing addresses of the lobbyists and principal organizations.
139-
After contacting the Ethics Commission, I was provided with an Excel file containing the additional
140-
mailing address variable; this data will be processed and added to the site.
141117

142118
## Import
143119

144120
If the file containing addresses is found on disc, the wrangling will continue; otherwise, the
145121
raw file will be read from the portal and not wrangled any futher.
146122

147123
```{r raw_dir}
148-
raw_dir <- here("hi", "lobbying", "data", "raw")
124+
raw_dir <- here("state","hi", "lobbying", "data", "raw")
149125
dir_create(raw_dir)
150126
```
151127

152-
```{r raw_read}
153-
geo_file <- dir_ls(raw_dir, type = "file", glob = "*.xlsx")
154-
raw_file <- "https://data.hawaii.gov/api/views/gdxe-t5ff/rows.csv"
155-
156-
if (file_exists(geo_file)) {
157-
# read the excel file
158-
hilr <-
159-
read_csv(
160-
file = format_csv(read_excel(geo_file)),
161-
skip = 1,
162-
col_names = c(
163-
"reg_name", "lob_name", "org_name", "lob_firm", "date_filed", "status",
164-
"date_reg", "date_term", "lob_email", "lob_phone", "lob_ext", "lob_geo",
165-
"lob_city", "lob_state", "lob_zip"
166-
),
167-
col_types = cols(
168-
.default = col_character(),
169-
date_filed = col_date_usa(),
170-
date_reg = col_date_usa(),
171-
date_term = col_date_usa()
172-
)
173-
)
174-
} else {
175-
# read the portal file
176-
hilr <-
177-
read_csv(
178-
file = raw_file,
179-
col_types = cols(
180-
Registration = col_date_usa(),
181-
Termination = col_date_usa()
182-
)
183-
) %>%
184-
# rename, reorder, and clean
185-
rename(
186-
lob_name = `Lobbyist Name`,
187-
org_name = `Organization Name`,
188-
session = `Lobby Year`
189-
) %>%
190-
select(-View, View) %>%
191-
clean_names() %>%
192-
mutate(view = str_extract(view, "(?<=\\()(.*)(?=\\))"))
193-
# stop the document
194-
knit_exit()
195-
}
128+
```{r}
129+
hilr <- read_csv(dir_ls(raw_dir, regex = ".+registration.+"))
130+
131+
hilr <- hilr %>% clean_names()
196132
```
197133

134+
198135
## Wrangle
199136

200137
### Phone

state/ia/contribs/docs/ia_contribs_diary.Rmd

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
2-
title: "Iowa Contributions"
3-
author: "Kiernan Nicholls"
2+
title: "Iowa Campaign Contributions"
3+
author: "Kiernan Nicholls & Yanqi Xu"
44
date: "`r Sys.time()`"
55
output:
66
github_document:
@@ -34,7 +34,7 @@ if (!interactive()) {
3434
```
3535

3636
```{r create_docs_dir, eval=FALSE, echo=FALSE, include=FALSE}
37-
fs::dir_create(here::here("ia", "contribs", "docs"))
37+
fs::dir_create(here::here("state","ia", "contribs", "docs"))
3838
```
3939

4040
## Project
@@ -114,7 +114,7 @@ here::here()
114114

115115
## Data
116116

117-
Data is obtained from the [Iowa Ethics and Campaign Disclosure Board][iae].
117+
Data is obtained from the [Iowa Ethics and Campaign Disclosure Board][iae]. The API returns contributions dating back to `2003-01-01`.
118118

119119
> In order to accomplish its Mission, the Board will enforce the provisions of
120120
the "Campaign Disclosure Act" in Iowa Code chapter 68A, the "Government Ethics
@@ -125,12 +125,13 @@ rules in Chapter 351 of the Iowa Administrative Code.
125125
[iae]: https://ethics.iowa.gov/
126126

127127
The Board provides the file through the [state open data portal][odp] under the
128-
title "Iowa Campaign Contributions Received." The data can be accessed as a
128+
title ["Iowa Campaign Contributions Received."][cont] The data can be accessed as a
129129
tabular CSV file or through a number of direct APIs.
130130

131-
The database was created June 18, 2015 and last updated December 10, 2019.
131+
The database was created June 18, 2015 and last updated July 1, 2023.
132132

133133
[odp]: https://data.iowa.gov/
134+
[cont]:https://data.iowa.gov/Campaigns-Elections/Iowa-Campaign-Contributions-Received/smfg-ds7h
134135

135136
> This dataset contains information on contributions and in kind donations made
136137
by organizations and individuals to state-wide, legislative or local candidate
@@ -173,7 +174,7 @@ These fixed files can be read into a single data frame with `purrr::map_df()`
173174
and `readr::read_delim()`.
174175

175176
```{r raw_dir}
176-
raw_dir <- dir_create(here("ia", "contribs", "data", "raw"))
177+
raw_dir <- dir_create(here("state","ia", "contribs", "data", "raw"))
177178
raw_url <- "https://data.iowa.gov/api/views/smfg-ds7h/rows.csv"
178179
raw_path <- path(raw_dir, basename(raw_url))
179180
if (!this_file_new(raw_path)) {
@@ -187,32 +188,29 @@ iac <- vroom(
187188
na = c("", "N/A", "NA", "n/a", "na"),
188189
col_types = cols(
189190
.default = col_character(),
190-
`Date` = col_date_usa(),
191+
`Date` = col_date_mdy(),
191192
`Contribution Amount` = col_double()
192193
)
193194
)
194195
```
195196

196197
```{r raw_rename, echo=FALSE}
197198
iac <- iac %>%
198-
clean_names("snake") %>%
199+
clean_names("snake") %>%
199200
rename(
200201
code = committee_code,
201202
committee = committee_name,
202203
type = transaction_type,
203204
cont_comm_cd = contributing_committee_code,
204205
cont_org = contributing_organization,
205206
first = first_name,
206-
mi = middle_initial,
207207
last = last_name,
208208
addr1 = address_line_1,
209209
addr2 = address_line_2,
210210
zip = zip_code,
211-
amount = contribution_amount,
212-
tx = transaction_id,
211+
amount = contribution_amount
213212
) %>%
214-
mutate(last = coalesce(last, cont_org)) %>%
215-
select(-cont_org, -cont_comm_cd)
213+
select(-cont_comm_cd)
216214
```
217215

218216
We can ensure this file was read correctly by counting distinct values of a
@@ -268,7 +266,8 @@ iac %>%
268266
We can create a file containing every duplicate record in the data.
269267

270268
```{r dupe_write}
271-
dupe_file <- path(dirname(raw_dir), "dupes.csv")
269+
dupe_file <- path(raw_dir, "dupes.csv")
270+
iac <- rowid_to_column(iac, var = "tx")
272271
if (!file_exists(dupe_file)) {
273272
write_lines("tx,dupe_flag", dupe_file)
274273
iac <- mutate(iac, group = str_sub(date, end = 7))
@@ -279,7 +278,7 @@ if (!file_exists(dupe_file)) {
279278
pb <- txtProgressBar(max = length(ias), style = 3)
280279
for (i in seq_along(ias)) {
281280
write_csv(
282-
path = dupe_file,
281+
file = dupe_file,
283282
append = TRUE,
284283
x = tibble(
285284
tx = ia_tx[[i]],
@@ -300,7 +299,7 @@ if (!file_exists(dupe_file)) {
300299
dupes <- read_csv(
301300
file = dupe_file,
302301
col_types = cols(
303-
tx = col_character(),
302+
tx = col_integer(),
304303
dupe_flag = col_logical()
305304
)
306305
)
@@ -321,6 +320,10 @@ iac %>%
321320
arrange(date, last)
322321
```
323322

323+
```{r}
324+
iac <- iac %>% select(-tx)
325+
```
326+
324327
## Categorical
325328

326329
```{r n_distinct}
@@ -362,7 +365,7 @@ iac %>%
362365
filter(!is.na(amount), amount >= 1) %>%
363366
ggplot(aes(x = type, y = amount)) +
364367
geom_violin(aes(fill = type), adjust = 3) +
365-
scale_fill_brewer(palette = "Dark2", guide = FALSE) +
368+
scale_fill_brewer(palette = "Dark2", guide = "none") +
366369
scale_y_continuous(
367370
breaks = c(1 %o% 10^(0:6)),
368371
labels = dollar,
@@ -391,7 +394,7 @@ iac %>%
391394
geom_col(aes(fill = even)) +
392395
scale_fill_brewer(palette = "Dark2") +
393396
scale_y_continuous(labels = comma) +
394-
scale_x_continuous(breaks = seq(1998, 2020, by = 2)) +
397+
scale_x_continuous(breaks = seq(1998, 2024, by = 2)) +
395398
theme(legend.position = "bottom") +
396399
labs(
397400
title = "Iowa Contributions by Year",
@@ -672,7 +675,7 @@ glimpse(sample_n(iac, 50))
672675
Now the file can be saved on disk for upload to the Accountability server.
673676

674677
```{r clean_dir}
675-
clean_dir <- dir_create(here("ia", "contribs", "data", "clean"))
678+
clean_dir <- dir_create(here("state","ia", "contribs", "data", "clean"))
676679
clean_path <- path(clean_dir, "ia_contribs_clean.csv")
677680
write_csv(iac, clean_path, na = "")
678681
(clean_size <- file_size(clean_path))
@@ -684,7 +687,7 @@ file_encoding(clean_path) %>%
684687

685688
We can use the `aws.s3::put_object()` to upload the text file to the IRW server.
686689

687-
```{r aws_upload, eval=TRUE}
690+
```{r aws_upload, eval=FALSE}
688691
aws_path <- path("csv", basename(clean_path))
689692
if (!object_exists(aws_path, "publicaccountability")) {
690693
put_object(
@@ -710,20 +713,21 @@ dict_raw <- tibble(
710713
var = md_code(names(iac)),
711714
type = md_code(map_chr(iac, typeof)),
712715
def = c(
713-
"Unique transaction hash",
714716
"Date contribution was made",
715717
"Recipient committee code",
718+
"Recipient committee type",
716719
"Recipient committee name",
717720
"Type of contribution (direct, in-kind)",
721+
"Contributor organization",
718722
"Contributor first name",
719-
"Contributor middle initial",
720-
"Contributor last name or organization",
723+
"Contributor last name",
721724
"Contributor street address",
722725
"Contributor secondary address",
723726
"Contributor state abbreviation",
724727
"Contributor city name",
725728
"Contributor ZIP+4 code",
726729
"Amount or correction",
730+
"Check number",
727731
"Flag for missing value",
728732
"Flag for duplicate row",
729733
"Calendar year contribution made",

0 commit comments

Comments
 (0)