diff --git a/.gitignore b/.gitignore index 20c351e..00a6346 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ *.Rproj *.ini tests/* -R/my_custom_fun.R \ No newline at end of file +R/my_custom_fun.R diff --git a/R/standartox.R b/R/standartox.R index 5bf2298..5fcb0f1 100644 --- a/R/standartox.R +++ b/R/standartox.R @@ -373,7 +373,7 @@ stx_query = function( # Split up list object total_entries = nrow(stxDb$test_fin) tox.dt = stxDb$test_fin # final output object. LARGE right after import! - suppressWarnings( tox.dt[, casnr := NULL] ) # HOT FIX! + suppressWarnings( tox.dt[, cl_id := NULL] ) # HOT FIX! stxDb = stxDb[stx_table[-1]] # dump the largest object! <- hope to save some memory with that. # First quick filter steps: @@ -410,9 +410,11 @@ stx_query = function( if( nrow(stxDb$phch) == 0 ) { warning("No query matches found for the provided CAS numbers. Please check the input values.") } - merge(stxDb$phch, tox.dt, all.x = TRUE, by = "cl_id") -> tox.dt + #merge(stxDb$phch, tox.dt, all.x = TRUE, by = "cl_id") -> tox.dt + merge(stxDb$phch, tox.dt, all.x = TRUE, by = "casnr") -> tox.dt } else { - merge(stxDb$phch, tox.dt, all.y = TRUE, by = "cl_id") -> tox.dt + #merge(stxDb$phch, tox.dt, all.y = TRUE, by = "cl_id") -> tox.dt + merge(stxDb$phch, tox.dt, all.y = TRUE, by = "casnr") -> tox.dt } suppressWarnings( tox.dt[, c("chem_class","casnr", "cl_id") := NULL] ) # don't need the cl_id column anymore. diff --git a/README.Rmd b/README.Rmd index e0b94d2..d427458 100644 --- a/README.Rmd +++ b/README.Rmd @@ -12,13 +12,9 @@ Standartox is a database and tool facilitating the retrieval of ecotoxicological ## Installation -**_NOTE:_** Currently v0.0.2 is only available here on GitHub. - ```{r eval=FALSE} -# install.packages('standartox') # Currently only available on GitHub -if (!requireNamespace("standartox", quietly = TRUE)) { - remotes::install_github('andschar/standartox') # development version -} +# install.packages('standartox') # CRAN Server version +remotes::install_github('andschar/standartox') # Github version ``` ## Functions @@ -81,33 +77,18 @@ We subset the retrieved data to the 20 most tested chemicals and plot the result ```{r warning=FALSE, message=FALSE} cas20 = oncor[ , .N, cas ][ order(-N) ][1:20] oncor20 = oncor[ cas %in% cas20$cas ] -# add new column which combines chem_name & cas -oncor20[ , chem_name := paste0(chem_name, ' [CAS: ', cas, ']') ] -gmn_dt = oncor20[ , .(gmn = exp(mean(log(concentration), na.rm = TRUE))), .(cas, chem_name, tax_genus)] +# add new column which combines common_name & cas +oncor20[ , new_chem_name := paste0(common_name, ' [CAS: ', cas, ']') ] +gmn_dt = oncor20[ , .(gmn = exp(mean(log(concentration), na.rm = TRUE))), .(cas, new_chem_name, tax_genus)] ``` ```{r warning=FALSE, message=FALSE, fig.width=9, fig.height=6, dpi=300} require(ggplot2) -# ggplot(oncor20, aes(y = cas)) + -# geom_point(aes(x = concentration, col = 'All values'), -# pch = 1, alpha = 0.3) + -# geom_point(data = gmn_dt, -# aes(y = reorder(cas, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'), -# size = 3) + -# scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000), -# labels = c(0.01, 0.1, 1, 10, 100, 1000, 10000)) + -# scale_color_viridis_d(name = '') + -# labs(title = 'LC50 values for Genus: Oncorhynchus', -# subtitle = '20 most tested chemicals', -# x = 'Concentration [g/L]') + -# theme_minimal() + -# theme(axis.title.y = element_blank()) - -ggplot(oncor20, aes(y = chem_name)) + +ggplot(oncor20, aes(y = new_chem_name)) + geom_point(aes(x = concentration, col = 'All values'), pch = 1, alpha = 0.3) + geom_point(data = gmn_dt, - aes(y = reorder(chem_name, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'), + aes(y = reorder(new_chem_name, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'), size = 3) + scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000), labels = c(0.01, 0.1, 1, 10, 100, 1000, 10000)) + diff --git a/README.html b/README.html index c7506d5..b0bed91 100644 --- a/README.html +++ b/README.html @@ -603,7 +603,7 @@
Standartox is a database and tool facilitating the retrieval of ecotoxicological test data. It is based on the EPA ECOTOX database as well as on data from several other chemical databases and allows users to filter @@ -614,12 +614,8 @@
NOTE: Currently v0.0.2 is only available -here on GitHub.
-# install.packages('standartox') # Currently only available on GitHub
-if (!requireNamespace("standartox", quietly = TRUE)) {
- remotes::install_github('andschar/standartox') # development version
-}# install.packages('standartox') # CRAN Server version
+remotes::install_github('andschar/standartox') # Github versionStandartox mainly consists of the functions
stx_catalog() and stx_query(). The former
@@ -636,16 +632,16 @@
stx_catalog()## [1] "date_compiled" "standartox_version" "cas" "chem_class"
-## [5] "cname" "ref_author" "ref_number" "ref_title"
-## [9] "ref_year" "class" "continent" "family"
-## [13] "genus" "group" "habitat" "order"
-## [17] "rank" "taxon" "casnr" "cl_id"
-## [21] "concentration" "concentration_orig" "concentration_type" "concentration_unit"
-## [25] "concentration_unit_orig" "duration" "duration_orig" "duration_unit"
-## [29] "duration_unit_orig" "effect" "endpoint" "endpoint_group"
-## [33] "exposure" "qualifier" "ref_number" "result_id"
-## [37] "tl_id"
+## [1] "cas" "casnr" "chem_class" "chem_name"
+## [5] "ref_author" "ref_number" "ref_title" "ref_year"
+## [9] "tax_class" "tax_continent" "tax_family" "tax_genus"
+## [13] "tax_group" "tax_habitat" "tax_order" "tax_rank"
+## [17] "tax_taxon" "casnr" "cl_id" "concentration"
+## [21] "concentration_orig" "concentration_type" "concentration_unit" "concentration_unit_orig"
+## [25] "duration" "duration_orig" "duration_unit" "duration_unit_orig"
+## [29] "effect" "endpoint" "endpoint_group" "exposure"
+## [33] "measurement" "organism_lifestage" "qualifier" "ref_number"
+## [37] "result_id" "tl_id"
Showing the top 10 endpoint values from
stx_catalog()
stx_catalog()stx_query()Let’s say, we want to retrieve the 20 most tested chemicals on the @@ -751,20 +744,21 @@
We subset the retrieved data to the 20 most tested chemicals and plot the result.
cas20 = oncor[ , .N, cas ][ order(-N) ][1:20]
oncor20 = oncor[ cas %in% cas20$cas ]
-# add new column which combines cname & cas
-oncor20[ , cname := paste0(cname, ' [CAS: ', cas, ']') ]
-gmn_dt = oncor20[ , .(gmn = exp(mean(log(concentration), na.rm = TRUE))), .(cas, cname, tax_genus)]require(ggplot2)
-ggplot(oncor20, aes(y = cname)) +
+ggplot(oncor20, aes(y = new_chem_name)) +
geom_point(aes(x = concentration, col = 'All values'),
pch = 1, alpha = 0.3) +
geom_point(data = gmn_dt,
- aes(y = reorder(cname, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'),
+ aes(y = reorder(new_chem_name, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'),
size = 3) +
scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000),
labels = c(0.01, 0.1, 1, 10, 100, 1000, 10000)) +
@@ -774,8 +768,7 @@ Example: Oncorhynchus
x = 'Concentration [g/L]') +
theme_minimal() +
theme(axis.title.y = element_blank())The article on Standartox is published here.