@@ -34,7 +34,7 @@ if (!interactive()) {
34
34
```
35
35
36
36
``` {r create_docs_dir, eval=FALSE, echo=FALSE, include=FALSE}
37
- fs::dir_create(here::here("co", "contribs", "docs"))
37
+ fs::dir_create(here::here("state"," co", "contribs", "docs"))
38
38
```
39
39
40
40
## Project
@@ -200,16 +200,19 @@ TRACER also provides a PDF [spreadsheet key][key].
200
200
201
201
## Import
202
202
203
- We can download the annual ZIP archives directly from TRACER.
203
+ We can download the annual ZIP archives directly from TRACER. This data is extracted from the Department of State database as it existed as of 10/29/2022 early morning. Since the last update took place
204
204
205
205
``` {r raw_dir}
206
- raw_dir <- dir_create(here("co", "contribs", "data", "raw"))
206
+ raw_dir <- dir_create(here("state"," co", "contribs", "data", "raw"))
207
207
raw_base <- "http://tracer.sos.colorado.gov/PublicSite/Docs/BulkDataDownloads/"
208
- raw_urls <- str_c(raw_base, glue("{2000: 2020}_ContributionData.csv.zip"))
208
+ raw_urls <- str_c(raw_base, glue("{2020:2022 }_ContributionData.csv.zip"))
209
209
raw_paths <- path(raw_dir, basename(raw_urls))
210
- if (!all_files_new(raw_dir)) {
210
+ for (f in raw_paths) {
211
+ if (!this_file_new(f)) {
211
212
download.file(raw_urls, raw_paths)
213
+ }
212
214
}
215
+
213
216
```
214
217
215
218
Some slight adjustments need to be made properly read the text files. Every cell
@@ -218,6 +221,7 @@ result parsing errors. We can read the lines of each file and replace these
218
221
with single-quotes.
219
222
220
223
``` {r raw_fix}
224
+
221
225
fix_dir <- dir_create(path(dirname(raw_dir), "fix"))
222
226
for (r in raw_paths) {
223
227
f <- path(fix_dir, path_ext_remove(basename(r)))
@@ -264,7 +268,7 @@ These binary variable should be converted to logical.
264
268
``` {r raw_parse}
265
269
old_names <- names(coc)
266
270
coc <- coc %>%
267
- mutate(across(c(amended, amendment), equals, "Y")) %>%
271
+ mutate(across(c(amended, amendment), .fns = magrittr:: equals, "Y")) %>%
268
272
mutate(across(electioneering, ~!is.na(.))) %>%
269
273
mutate(across(source, basename)) %>%
270
274
mutate(across(ends_with("date"), as_date)) %>%
@@ -286,6 +290,20 @@ glimpse(coc)
286
290
tail(coc)
287
291
```
288
292
293
+ ``` {r, include=FALSE}
294
+ co_prev <- read_csv(dir_ls(raw_dir,regexp = "co_contribs_clean.csv"))
295
+ co_prev$filed_date <- co_prev$filed_date %>% as.Date(format == "%Y-%d-%m")
296
+ ```
297
+
298
+ We'll look at the filing date range. We'll filter out anything filed before Oct 1, 2020 as those records are already in our previous update.
299
+ ``` {r}
300
+ max(coc$filed_date)
301
+ min(coc$filed_date)
302
+
303
+ coc <- coc %>%
304
+ filter(filed_date >= as.Date("2020-10-01"))
305
+ ```
306
+
289
307
### Missing
290
308
291
309
Columns vary in their degree of missing values.
@@ -313,7 +331,7 @@ coc <- coc %>%
313
331
314
332
` r percent(mean(coc$na_flag), 0.1) ` of records are missing some value.
315
333
316
- ``` {r na_check <- }
334
+ ``` {r na_check}
317
335
mean(coc$na_flag)
318
336
```
319
337
@@ -397,6 +415,7 @@ explore_plot(coc, jurisdiction) + scale_x_truncate()
397
415
#### Amounts
398
416
399
417
``` {r ammount_summary}
418
+ coc <- coc %>% mutate(amount = as.numeric(amount))
400
419
summary(coc$amount)
401
420
mean(coc$amount <= 0, na.rm = TRUE)
402
421
```
@@ -414,7 +433,7 @@ coc %>%
414
433
) +
415
434
labs(
416
435
title = "Colorado Contributions Amount Distribution",
417
- subtitle = "from 2000 to 2019 ",
436
+ subtitle = "from 2020 to 2022 ",
418
437
caption = "Source: CO TRACER",
419
438
x = "Amount",
420
439
y = "Count"
@@ -425,7 +444,7 @@ coc %>%
425
444
426
445
``` {r date_year}
427
446
coc <- mutate(coc, year = year(date))
428
- coc$year[which(coc$year > 2020 | coc$year < 2000)] <- NA
447
+ # coc$year[which(coc$year > 2020 | coc$year < 2000)] <- NA
429
448
```
430
449
431
450
``` {r date_range}
@@ -443,7 +462,7 @@ coc %>%
443
462
geom_col(aes(fill = even)) +
444
463
scale_fill_brewer(palette = "Dark2") +
445
464
scale_y_continuous(labels = comma) +
446
- scale_x_continuous(breaks = seq(2000, 2020, by = 2 )) +
465
+ scale_x_continuous(breaks = seq(min(year(coc$date)), max(year(coc$date)) )) +
447
466
theme(legend.position = "bottom") +
448
467
labs(
449
468
title = "Colorado Contributions by Year",
@@ -752,8 +771,8 @@ glimpse(sample_frac(coc))
752
771
Now the file can be saved on disk for upload to the Accountability server.
753
772
754
773
``` {r clean_dir}
755
- clean_dir <- dir_create(here("co", "contribs", "data", "clean"))
756
- clean_path <- path(clean_dir, "co_contribs_clean .csv")
774
+ clean_dir <- dir_create(here("state"," co", "contribs", "data", "clean"))
775
+ clean_path <- path(clean_dir, "co_contribs_clean_20201001-20221028 .csv")
757
776
write_csv(coc, clean_path, na = "")
758
777
file_size(clean_path)
759
778
file_encoding(clean_path) %>%
0 commit comments