diff --git a/.gitignore b/.gitignore index 20c351e..00a6346 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ *.Rproj *.ini tests/* -R/my_custom_fun.R \ No newline at end of file +R/my_custom_fun.R diff --git a/R/standartox.R b/R/standartox.R index 5bf2298..5fcb0f1 100644 --- a/R/standartox.R +++ b/R/standartox.R @@ -373,7 +373,7 @@ stx_query = function( # Split up list object total_entries = nrow(stxDb$test_fin) tox.dt = stxDb$test_fin # final output object. LARGE right after import! - suppressWarnings( tox.dt[, casnr := NULL] ) # HOT FIX! + suppressWarnings( tox.dt[, cl_id := NULL] ) # HOT FIX! stxDb = stxDb[stx_table[-1]] # dump the largest object! <- hope to save some memory with that. # First quick filter steps: @@ -410,9 +410,11 @@ stx_query = function( if( nrow(stxDb$phch) == 0 ) { warning("No query matches found for the provided CAS numbers. Please check the input values.") } - merge(stxDb$phch, tox.dt, all.x = TRUE, by = "cl_id") -> tox.dt + #merge(stxDb$phch, tox.dt, all.x = TRUE, by = "cl_id") -> tox.dt + merge(stxDb$phch, tox.dt, all.x = TRUE, by = "casnr") -> tox.dt } else { - merge(stxDb$phch, tox.dt, all.y = TRUE, by = "cl_id") -> tox.dt + #merge(stxDb$phch, tox.dt, all.y = TRUE, by = "cl_id") -> tox.dt + merge(stxDb$phch, tox.dt, all.y = TRUE, by = "casnr") -> tox.dt } suppressWarnings( tox.dt[, c("chem_class","casnr", "cl_id") := NULL] ) # don't need the cl_id column anymore. diff --git a/README.Rmd b/README.Rmd index e0b94d2..d427458 100644 --- a/README.Rmd +++ b/README.Rmd @@ -12,13 +12,9 @@ Standartox is a database and tool facilitating the retrieval of ecotoxicological ## Installation -**_NOTE:_** Currently v0.0.2 is only available here on GitHub. - ```{r eval=FALSE} -# install.packages('standartox') # Currently only available on GitHub -if (!requireNamespace("standartox", quietly = TRUE)) { - remotes::install_github('andschar/standartox') # development version -} +# install.packages('standartox') # CRAN Server version +remotes::install_github('andschar/standartox') # Github version ``` ## Functions @@ -81,33 +77,18 @@ We subset the retrieved data to the 20 most tested chemicals and plot the result ```{r warning=FALSE, message=FALSE} cas20 = oncor[ , .N, cas ][ order(-N) ][1:20] oncor20 = oncor[ cas %in% cas20$cas ] -# add new column which combines chem_name & cas -oncor20[ , chem_name := paste0(chem_name, ' [CAS: ', cas, ']') ] -gmn_dt = oncor20[ , .(gmn = exp(mean(log(concentration), na.rm = TRUE))), .(cas, chem_name, tax_genus)] +# add new column which combines common_name & cas +oncor20[ , new_chem_name := paste0(common_name, ' [CAS: ', cas, ']') ] +gmn_dt = oncor20[ , .(gmn = exp(mean(log(concentration), na.rm = TRUE))), .(cas, new_chem_name, tax_genus)] ``` ```{r warning=FALSE, message=FALSE, fig.width=9, fig.height=6, dpi=300} require(ggplot2) -# ggplot(oncor20, aes(y = cas)) + -# geom_point(aes(x = concentration, col = 'All values'), -# pch = 1, alpha = 0.3) + -# geom_point(data = gmn_dt, -# aes(y = reorder(cas, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'), -# size = 3) + -# scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000), -# labels = c(0.01, 0.1, 1, 10, 100, 1000, 10000)) + -# scale_color_viridis_d(name = '') + -# labs(title = 'LC50 values for Genus: Oncorhynchus', -# subtitle = '20 most tested chemicals', -# x = 'Concentration [g/L]') + -# theme_minimal() + -# theme(axis.title.y = element_blank()) - -ggplot(oncor20, aes(y = chem_name)) + +ggplot(oncor20, aes(y = new_chem_name)) + geom_point(aes(x = concentration, col = 'All values'), pch = 1, alpha = 0.3) + geom_point(data = gmn_dt, - aes(y = reorder(chem_name, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'), + aes(y = reorder(new_chem_name, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'), size = 3) + scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000), labels = c(0.01, 0.1, 1, 10, 100, 1000, 10000)) + diff --git a/README.html b/README.html index c7506d5..b0bed91 100644 --- a/README.html +++ b/README.html @@ -603,7 +603,7 @@

Standartox

CRAN -Downloads

+Downloads

Standartox is a database and tool facilitating the retrieval of ecotoxicological test data. It is based on the EPA ECOTOX database as well as on data from several other chemical databases and allows users to filter @@ -614,12 +614,8 @@

Standartox

SSD - Species Sensitivity Distributions to asses environmental toxicity of chemicals.

Installation

-

NOTE: Currently v0.0.2 is only available -here on GitHub.

-
# install.packages('standartox') # Currently only available on GitHub
-if (!requireNamespace("standartox", quietly = TRUE)) {
-  remotes::install_github('andschar/standartox') # development version
-}
+
# install.packages('standartox') # CRAN Server version
+remotes::install_github('andschar/standartox') # Github version

Functions

Standartox mainly consists of the functions stx_catalog() and stx_query(). The former @@ -636,16 +632,16 @@

stx_catalog()

require(data.table) catal = stx_catalog() names(catal) -
##  [1] "date_compiled"           "standartox_version"      "cas"                     "chem_class"             
-##  [5] "cname"                   "ref_author"              "ref_number"              "ref_title"              
-##  [9] "ref_year"                "class"                   "continent"               "family"                 
-## [13] "genus"                   "group"                   "habitat"                 "order"                  
-## [17] "rank"                    "taxon"                   "casnr"                   "cl_id"                  
-## [21] "concentration"           "concentration_orig"      "concentration_type"      "concentration_unit"     
-## [25] "concentration_unit_orig" "duration"                "duration_orig"           "duration_unit"          
-## [29] "duration_unit_orig"      "effect"                  "endpoint"                "endpoint_group"         
-## [33] "exposure"                "qualifier"               "ref_number"              "result_id"              
-## [37] "tl_id"
+
##  [1] "cas"                     "casnr"                   "chem_class"              "chem_name"              
+##  [5] "ref_author"              "ref_number"              "ref_title"               "ref_year"               
+##  [9] "tax_class"               "tax_continent"           "tax_family"              "tax_genus"              
+## [13] "tax_group"               "tax_habitat"             "tax_order"               "tax_rank"               
+## [17] "tax_taxon"               "casnr"                   "cl_id"                   "concentration"          
+## [21] "concentration_orig"      "concentration_type"      "concentration_unit"      "concentration_unit_orig"
+## [25] "duration"                "duration_orig"           "duration_unit"           "duration_unit_orig"     
+## [29] "effect"                  "endpoint"                "endpoint_group"          "exposure"               
+## [33] "measurement"             "organism_lifestage"      "qualifier"               "ref_number"             
+## [37] "result_id"               "tl_id"
catal$endpoint # access the parameter top five endpoints

Showing the top 10 endpoint values from stx_catalog()

@@ -658,43 +654,43 @@

stx_catalog()

-202306 +205080 NOEL -191672 +196597 NR -162103 +165693 LOEL -152748 +165146 NOEC -135906 +143740 LC50 -113089 +123889 LOEC -53417 +56362 EC50 -22027 +22496 BCF -17337 +17739 NR-LETH -16179 +16213 LD50 @@ -712,16 +708,13 @@

stx_query()

## Reading in Standartox Data ... -## fstcore package v0.10.0 - -## (OpenMP detected, using 8 threads) - ## Removing rows with 'NR' (not reported) for endpoint & duration_unit ... ## Appending chemical information ... ## Appending taxonomic information ... +## Query returned 801451 results out of 1212683 total entries. ## Done!

Example: Oncorhynchus

Let’s say, we want to retrieve the 20 most tested chemicals on the @@ -751,20 +744,21 @@

Example: Oncorhynchus

## Appending taxonomic information ... +## Query returned 3178 results out of 1212683 total entries. ## Done!

We subset the retrieved data to the 20 most tested chemicals and plot the result.

cas20 = oncor[ , .N, cas ][ order(-N) ][1:20]
 oncor20 = oncor[ cas %in% cas20$cas ]
-# add new column which combines cname & cas
-oncor20[ , cname := paste0(cname, ' [CAS: ', cas, ']') ]
-gmn_dt = oncor20[ , .(gmn = exp(mean(log(concentration), na.rm = TRUE))), .(cas, cname, tax_genus)]
+# add new column which combines common_name & cas +oncor20[ , new_chem_name := paste0(common_name, ' [CAS: ', cas, ']') ] +gmn_dt = oncor20[ , .(gmn = exp(mean(log(concentration), na.rm = TRUE))), .(cas, new_chem_name, tax_genus)]
require(ggplot2)
-ggplot(oncor20, aes(y = cname)) +
+ggplot(oncor20, aes(y = new_chem_name)) +
   geom_point(aes(x = concentration, col = 'All values'),
              pch = 1, alpha = 0.3) +
   geom_point(data = gmn_dt,
-             aes(y = reorder(cname, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'),
+             aes(y = reorder(new_chem_name, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'),
              size = 3) +
   scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000),
                 labels = c(0.01, 0.1, 1, 10, 100, 1000, 10000)) +
@@ -774,8 +768,7 @@ 

Example: Oncorhynchus

x = 'Concentration [g/L]') + theme_minimal() + theme(axis.title.y = element_blank())
-

-
# Antimycin A (CAS 1397-94-0) listed as NA in standartox! Need to check this ... 
+

Article

The article on Standartox is published here.

Contributors

diff --git a/README.md b/README.md index 3a5b6b4..8d200ff 100644 --- a/README.md +++ b/README.md @@ -18,13 +18,9 @@ environmental toxicity of chemicals. ## Installation -***NOTE:*** Currently v0.0.2 is only available here on GitHub. - ``` r -# install.packages('standartox') # Currently only available on GitHub -if (!requireNamespace("standartox", quietly = TRUE)) { - remotes::install_github('andschar/standartox') # development version -} +# install.packages('standartox') # CRAN Server version +remotes::install_github('andschar/standartox') # Github version ``` ## Functions @@ -48,16 +44,16 @@ catal = stx_catalog() names(catal) ``` - ## [1] "date_compiled" "standartox_version" "cas" "chem_class" - ## [5] "cname" "ref_author" "ref_number" "ref_title" - ## [9] "ref_year" "class" "continent" "family" - ## [13] "genus" "group" "habitat" "order" - ## [17] "rank" "taxon" "casnr" "cl_id" - ## [21] "concentration" "concentration_orig" "concentration_type" "concentration_unit" - ## [25] "concentration_unit_orig" "duration" "duration_orig" "duration_unit" - ## [29] "duration_unit_orig" "effect" "endpoint" "endpoint_group" - ## [33] "exposure" "qualifier" "ref_number" "result_id" - ## [37] "tl_id" + ## [1] "cas" "casnr" "chem_class" "chem_name" + ## [5] "ref_author" "ref_number" "ref_title" "ref_year" + ## [9] "tax_class" "tax_continent" "tax_family" "tax_genus" + ## [13] "tax_group" "tax_habitat" "tax_order" "tax_rank" + ## [17] "tax_taxon" "casnr" "cl_id" "concentration" + ## [21] "concentration_orig" "concentration_type" "concentration_unit" "concentration_unit_orig" + ## [25] "duration" "duration_orig" "duration_unit" "duration_unit_orig" + ## [29] "effect" "endpoint" "endpoint_group" "exposure" + ## [33] "measurement" "organism_lifestage" "qualifier" "ref_number" + ## [37] "result_id" "tl_id" ``` r catal$endpoint # access the parameter top five endpoints @@ -67,16 +63,16 @@ Showing the top 10 endpoint values from `stx_catalog()` | n | variable | |-------:|:---------| -| 202306 | NOEL | -| 191672 | NR | -| 162103 | LOEL | -| 152748 | NOEC | -| 135906 | LC50 | -| 113089 | LOEC | -| 53417 | EC50 | -| 22027 | BCF | -| 17337 | NR-LETH | -| 16179 | LD50 | +| 205080 | NOEL | +| 196597 | NR | +| 165693 | LOEL | +| 165146 | NOEC | +| 143740 | LC50 | +| 123889 | LOEC | +| 56362 | EC50 | +| 22496 | BCF | +| 17739 | NR-LETH | +| 16213 | LD50 | ### `stx_query()` @@ -93,16 +89,13 @@ more detail. ## Reading in Standartox Data ... - ## fstcore package v0.10.0 - - ## (OpenMP detected, using 8 threads) - ## Removing rows with 'NR' (not reported) for endpoint & duration_unit ... ## Appending chemical information ... ## Appending taxonomic information ... + ## Query returned 801451 results out of 1212683 total entries. ## Done! ## Example: *Oncorhynchus* @@ -138,6 +131,7 @@ oncor = stx_query( ## Appending taxonomic information ... + ## Query returned 3178 results out of 1212683 total entries. ## Done! We subset the retrieved data to the 20 most tested chemicals and plot @@ -146,18 +140,18 @@ the result. ``` r cas20 = oncor[ , .N, cas ][ order(-N) ][1:20] oncor20 = oncor[ cas %in% cas20$cas ] -# add new column which combines cname & cas -oncor20[ , cname := paste0(cname, ' [CAS: ', cas, ']') ] -gmn_dt = oncor20[ , .(gmn = exp(mean(log(concentration), na.rm = TRUE))), .(cas, cname, tax_genus)] +# add new column which combines common_name & cas +oncor20[ , new_chem_name := paste0(common_name, ' [CAS: ', cas, ']') ] +gmn_dt = oncor20[ , .(gmn = exp(mean(log(concentration), na.rm = TRUE))), .(cas, new_chem_name, tax_genus)] ``` ``` r require(ggplot2) -ggplot(oncor20, aes(y = cname)) + +ggplot(oncor20, aes(y = new_chem_name)) + geom_point(aes(x = concentration, col = 'All values'), pch = 1, alpha = 0.3) + geom_point(data = gmn_dt, - aes(y = reorder(cname, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'), + aes(y = reorder(new_chem_name, -gmn), x = gmn, col = 'Standartox value\n(Geometric mean)'), size = 3) + scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000), labels = c(0.01, 0.1, 1, 10, 100, 1000, 10000)) + @@ -171,10 +165,6 @@ ggplot(oncor20, aes(y = cname)) + ![](README_files/figure-gfm/unnamed-chunk-8-1.png) -``` r -# Antimycin A (CAS 1397-94-0) listed as NA in standartox! Need to check this ... -``` - ## Article The article on Standartox is published diff --git a/README_files/figure-gfm/unnamed-chunk-12-1.png b/README_files/figure-gfm/unnamed-chunk-12-1.png deleted file mode 100644 index b4b4cde..0000000 Binary files a/README_files/figure-gfm/unnamed-chunk-12-1.png and /dev/null differ diff --git a/README_files/figure-gfm/unnamed-chunk-13-1.png b/README_files/figure-gfm/unnamed-chunk-13-1.png deleted file mode 100644 index 40727d9..0000000 Binary files a/README_files/figure-gfm/unnamed-chunk-13-1.png and /dev/null differ diff --git a/README_files/figure-gfm/unnamed-chunk-8-1.png b/README_files/figure-gfm/unnamed-chunk-8-1.png index f6006c8..d1f1403 100644 Binary files a/README_files/figure-gfm/unnamed-chunk-8-1.png and b/README_files/figure-gfm/unnamed-chunk-8-1.png differ