irworkshop
diff --git a/‎state/co/contribs/docs/co_contribs_diary.Rmd
Lines changed: 31 additions & 12 deletions b/‎state/co/contribs/docs/co_contribs_diary.Rmd
Lines changed: 31 additions & 12 deletions
@@ -34,7 +34,7 @@ if (!interactive()) {
 ```
 
 ```{r create_docs_dir, eval=FALSE, echo=FALSE, include=FALSE}
-fs::dir_create(here::here("co", "contribs", "docs"))
+fs::dir_create(here::here("state","co", "contribs", "docs"))
 ```
 
 ## Project
@@ -200,16 +200,19 @@ TRACER also provides a PDF [spreadsheet key][key].
 
 ## Import
 
-We can download the annual ZIP archives directly from TRACER.
+We can download the annual ZIP archives directly from TRACER. This data is extracted from the Department of State database as it existed as of  10/29/2022 early morning. Since the last update took place 
 
 ```{r raw_dir}
-raw_dir <- dir_create(here("co", "contribs", "data", "raw"))
+raw_dir <- dir_create(here("state","co", "contribs", "data", "raw"))
 raw_base <- "http://tracer.sos.colorado.gov/PublicSite/Docs/BulkDataDownloads/"
-raw_urls <- str_c(raw_base, glue("{2000:2020}_ContributionData.csv.zip"))
+raw_urls <- str_c(raw_base, glue("{2020:2022}_ContributionData.csv.zip"))
 raw_paths <- path(raw_dir, basename(raw_urls))
-if (!all_files_new(raw_dir)) {
+for (f in raw_paths) {
+if (!this_file_new(f)) {
   download.file(raw_urls, raw_paths)
+}  
 }
+
 ```
 
 Some slight adjustments need to be made properly read the text files. Every cell
@@ -218,6 +221,7 @@ result parsing errors. We can read the lines of each file and replace these
 with single-quotes.
 
 ```{r raw_fix}
+
 fix_dir <- dir_create(path(dirname(raw_dir), "fix"))
 for (r in raw_paths) {
   f <- path(fix_dir, path_ext_remove(basename(r)))
@@ -264,7 +268,7 @@ These binary variable should be converted to logical.
 ```{r raw_parse}
 old_names <- names(coc)
 coc <- coc %>% 
-  mutate(across(c(amended, amendment), equals, "Y")) %>% 
+  mutate(across(c(amended, amendment), .fns = magrittr::equals, "Y")) %>% 
   mutate(across(electioneering, ~!is.na(.))) %>% 
   mutate(across(source, basename)) %>% 
   mutate(across(ends_with("date"), as_date)) %>% 
@@ -286,6 +290,20 @@ glimpse(coc)
 tail(coc)
 ```
 
+```{r, include=FALSE}
+co_prev <- read_csv(dir_ls(raw_dir,regexp = "co_contribs_clean.csv"))
+co_prev$filed_date <- co_prev$filed_date %>% as.Date(format == "%Y-%d-%m")
+```
+
+We'll look at the filing date range. We'll filter out anything filed before Oct 1, 2020 as those records are already in our previous update.
+```{r}
+max(coc$filed_date)
+min(coc$filed_date)
+
+coc <- coc %>% 
+  filter(filed_date >= as.Date("2020-10-01"))
+```
+
 ### Missing
 
 Columns vary in their degree of missing values.
@@ -313,7 +331,7 @@ coc <- coc %>%
 
 `r percent(mean(coc$na_flag), 0.1)` of records are missing some value.
 
-```{r na_check <- }
+```{r na_check}
 mean(coc$na_flag)
 ```
 
@@ -397,6 +415,7 @@ explore_plot(coc, jurisdiction) + scale_x_truncate()
 #### Amounts
 
 ```{r ammount_summary}
+coc <- coc %>% mutate(amount = as.numeric(amount))
 summary(coc$amount)
 mean(coc$amount <= 0, na.rm = TRUE)
 ```
@@ -414,7 +433,7 @@ coc %>%
   ) +
   labs(
     title = "Colorado Contributions Amount Distribution",
-    subtitle = "from 2000 to 2019",
+    subtitle = "from 2020 to 2022",
     caption = "Source: CO TRACER",
     x = "Amount",
     y = "Count"
@@ -425,7 +444,7 @@ coc %>%
 
 ```{r date_year}
 coc <- mutate(coc, year = year(date))
-coc$year[which(coc$year > 2020 | coc$year < 2000)] <- NA
+#coc$year[which(coc$year > 2020 | coc$year < 2000)] <- NA
 ```
 
 ```{r date_range}
@@ -443,7 +462,7 @@ coc %>%
   geom_col(aes(fill = even)) + 
   scale_fill_brewer(palette = "Dark2") +
   scale_y_continuous(labels = comma) +
-  scale_x_continuous(breaks = seq(2000, 2020, by = 2)) +
+  scale_x_continuous(breaks = seq(min(year(coc$date)), max(year(coc$date)))) +
   theme(legend.position = "bottom") +
   labs(
     title = "Colorado Contributions by Year",
@@ -752,8 +771,8 @@ glimpse(sample_frac(coc))
 Now the file can be saved on disk for upload to the Accountability server.
 
 ```{r clean_dir}
-clean_dir <- dir_create(here("co", "contribs", "data", "clean"))
-clean_path <- path(clean_dir, "co_contribs_clean.csv")
+clean_dir <- dir_create(here("state","co", "contribs", "data", "clean"))
+clean_path <- path(clean_dir, "co_contribs_clean_20201001-20221028.csv")
 write_csv(coc, clean_path, na = "")
 file_size(clean_path)
 file_encoding(clean_path) %>%