From ec8af72205126027888d79ffc3838144089d764b Mon Sep 17 00:00:00 2001 From: Lishan Gao Date: Mon, 18 Sep 2023 14:45:12 -0500 Subject: [PATCH] Lishan --- .DS_Store | Bin 6148 -> 8196 bytes submissions/Final Exercise.R | 48 +++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 submissions/Final Exercise.R diff --git a/.DS_Store b/.DS_Store index fcf486dbb9ada5725243a6aa0c44d7c8ca46b34e..0d346ef2fbe8b1207f6b97230f1b571edabd2b40 100644 GIT binary patch delta 800 zcmb`FL2DCH5XWb;o6QSJF~v3p8cW1$+-|m}B3{~P4n2s{EqYK{-gZM>yM1p-HVrj| zUV5_h5FTDyQAG4Ar5Evo_*F`KQ0Hw^NEW;~%VXv-znOXaXZFqgCQrE!+j$brRf(aHZf&pt- z*~aXf)qrof0e9sPJS%vNKe1s1pn->TBA^NG`@gtgvu2C3wsmhRuH1zhI5>wv0J}K% zE%*>l_EK(+_Y%LgVmSdEBmr@xeM>^;6VTIiSuCf2CZN>j1jO`tagqLNWKoVL{`w<$ zjNU+5y~!8EvV0uLB98UBxkO&dF`nOZd!2A3>kl^!{7z(=zm$}!rS%NSkQ{kszvewV zqEWZgpzRm(w$6FrUd7aX=C^uIVdf!c5%pQC9T)gs3z3)4edh6AgLj$NiEHi41j&-D zQz-SD{kmyYD^|Tes8;&-Ewfs=ySOwMWXbI9g$HZSRng8LNP)W~1C1MxvW z6`%AQ`5n8>0?wp*$_Z6d)9NKt%As${^~4oCQ^mEJRq-TG2zHd43&P32ul=8~pK0nZ DJ!H@W delta 138 zcmZp1XfcprU|?W$DortDU=RQ@Ie-{Mvv5r;6q~50C@2Dw2Z`mR8wMxm=N4=%T+PfV zGPz2idUB%R&dEVSyC%B{FP$7LB0kwebmqj;9gEpHI0Tu2+JHcS8%Vfx&1}2C_AiEhh$Mei#1^|-V8*2an diff --git a/submissions/Final Exercise.R b/submissions/Final Exercise.R new file mode 100644 index 0000000..42e3965 --- /dev/null +++ b/submissions/Final Exercise.R @@ -0,0 +1,48 @@ +# Final Bootcamp Project + +# Task 1. import dataseet: +nys_schools <- read.csv("nys_schools.csv", stringsAsFactors=FALSE) +nys_acs <- read.csv("nys_acs.csv", stringsAsFactors=FALSE) + +# Tasl 2. Explore Data +summary(nys_schools) +summary(nys_acs) + +# Task 3.1 Replace missing data: +# Check number of missing data: +sum(nys_schools == -99) +sum(nys_acs == -99) +# Replace missing data: +missing <- apply(nys_schools,1,function(row) any(row==-99)) +nys_schools <-nys_schools[-which(missing),] + +# Task 3.2 +nys_acs$rank_poverty[nys_acs$county_per_poverty<=0.10903] <- "Low" +nys_acs$rank_poverty[(nys_acs$county_per_poverty<0.14929)&(nys_acs$county_per_poverty>0.10903)] <- "Medium" +nys_acs$rank_poverty[nys_acs$county_per_poverty>= 0.14929] <- "High" + +# Task 3.3 +group_by(nys_schools$year) +zscore<-nys_schools %>% + group_by(year) %>% + mutate(zscore_math=(mean_math_score-mean(mean_math_score))/sd(mean_math_score)) %>% + mutate(zscore_ela=(mean_ela_score-mean(mean_ela_score))/sd(mean_ela_score)) + +# Task 4. Merge datasets +merged_df <- merge(nys_acs, zscore, by.x = c("county_name","year"), by.y = c("county_name","year"),all.x = TRUE) +merged_df + +#Task 5.1 +long_merged_energy %>% + ggplot() + + geom_line(aes(x=datetime, y=output, group=source, col=source)) + + labs(title="Output by energy source over time", subtitle="Hourly data from September 3-9, 2018", x="Hour", y="Output (MW)") + +#Create a line plot that compares generation of wind, solar, and geothermal energy over time. +# Bonus: Set the line size to 1.5. +long_merged_energy %>% + filter(source=="wind"|source=="solar"|source=="geothermal") %>% + ggplot() + + geom_line(aes(x=datetime, y=output, group=source, col=source), size=1.5) + + labs(title="Wind vs. Solar vs. Geothermal generation", subtitle="Hourly data from September 3-9, 2018", x="Hour", y="Output (MW)") +