From 4e7ba6c2208bd154280a14eda977c4735f8c3cdb Mon Sep 17 00:00:00 2001 From: Hannes Reinwald Date: Wed, 24 Sep 2025 02:54:06 +0200 Subject: [PATCH] NEW stable release. load data.table included now --- DESCRIPTION | 4 +- NAMESPACE | 1 + R/standartox.R | 3 +- man/stx_catalog.Rd | 36 ++++++++----- man/stx_chem.Rd | 35 ++++++++----- man/stx_data.Rd | 29 ++++++++--- man/stx_query.Rd | 124 +++++++++++++++++++++++++++------------------ man/stx_taxa.Rd | 39 +++++++++----- 8 files changed, 168 insertions(+), 103 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7e190c1..b8606f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: standartox -Version: 1.0.1 +Version: 1.0.0 Date: 2025-09-19 Title: Ecotoxicological Information from the Standartox Database Authors@R: c( @@ -24,7 +24,7 @@ Imports: curl (>= 3.4), jsonlite (>= 1.6.1), fst (>= 0.9.4), - data.table (>= 1.13.0) + data.table (>= 1.17.8) License: MIT + file LICENSE URL: https://github.com/andschar/standartox BugReports: https://github.com/andschar/standartox/issues diff --git a/NAMESPACE b/NAMESPACE index 8096a7d..bb85450 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,3 +6,4 @@ export(stx_data) export(stx_meta) export(stx_query) export(stx_taxa) +import(data.table) diff --git a/R/standartox.R b/R/standartox.R index 5fcb0f1..6a6f706 100644 --- a/R/standartox.R +++ b/R/standartox.R @@ -334,6 +334,7 @@ filter_dt.AND <- function(dt, var_ls, silent = TRUE) { #' } #' #' @export +#' @import data.table stx_query = function( ## COMPOUND FILTERING ## cas_number = NULL, @@ -373,7 +374,7 @@ stx_query = function( # Split up list object total_entries = nrow(stxDb$test_fin) tox.dt = stxDb$test_fin # final output object. LARGE right after import! - suppressWarnings( tox.dt[, cl_id := NULL] ) # HOT FIX! + suppressWarnings( tox.dt[, cl_id := NULL] ) # stxDb = stxDb[stx_table[-1]] # dump the largest object! <- hope to save some memory with that. # First quick filter steps: diff --git a/man/stx_catalog.Rd b/man/stx_catalog.Rd index 4eb2c36..3017607 100644 --- a/man/stx_catalog.Rd +++ b/man/stx_catalog.Rd @@ -2,35 +2,43 @@ % Please edit documentation in R/standartox.R \name{stx_catalog} \alias{stx_catalog} -\title{Retrieve data catalog} +\title{Retrieve Standartox Data Catalog} \usage{ stx_catalog(silent = FALSE, stx_dir = file.path(tempdir(), "standartox")) } \arguments{ -\item{silent}{logical; If TRUE, suppresses messages. Default is FALSE.} +\item{silent}{logical; If \code{TRUE}, suppresses messages during the download process. Default is \code{FALSE}.} -\item{stx_dir}{character; Directory to which the catalog should be downloaded. Default is a temporary directory.} +\item{stx_dir}{character; Directory where the catalog file is stored. If the file doesn't exist, it will be downloaded to this location. Defaults to a "standartox" subdirectory within the session's temporary directory (\code{tempdir()}).} } \value{ -Returns a list of data.frames containing information on data base variables +Returns a list where each element is a character vector containing the unique values for a specific data field available for querying. } \description{ -Retrieve a data catalog for all variables (and their values) that can be retrieved with stx_query() +Retrieves a catalog of all possible values for variables that can be used for filtering in \code{stx_query()}. This is useful for discovering valid inputs for parameters like \code{endpoint_group}, \code{effect}, or \code{tax_group}. } \examples{ \donttest{ -# might fail if there is no internet connection or Zenodo.org not not available -# basic function call -l = stx_catalog() +# This function might fail if there is no internet connection or Zenodo.org is not available -# to get verbose output from the function -l = stx_catalog(silent = FALSE) +# Basic function call to retrieve the catalog +catalog_data <- stx_catalog() -# to specify a directory to which the catalog should be downloaded -l = stx_catalog(silent = FALSE, stx_dir = "~/tmp") -# This will create a directory under ~/tmp and download the catalog.rds file to that directory. -# The files are then permanently stored in that directory and can be directly read when restarting your R session. +# View the names of available fields in the catalog +names(catalog_data) + +# See all possible values for 'endpoint_group' +print(catalog_data$endpoint_group) + +# Get verbose output from the function +catalog_verbose <- stx_catalog(silent = FALSE) + +# Specify a permanent directory to download and cache the catalog file. +# This speeds up future calls as the file won't need to be re-downloaded when starting a new session. +my_dir <- file.path("~","my_stx_data") +catalog_cached <- stx_catalog(stx_dir = my_dir, silent = FALSE) } + } \author{ Andreas Scharmueller \email{andschar@protonmail.com} diff --git a/man/stx_chem.Rd b/man/stx_chem.Rd index 38ed0b5..2e95da2 100644 --- a/man/stx_chem.Rd +++ b/man/stx_chem.Rd @@ -2,34 +2,41 @@ % Please edit documentation in R/standartox.R \name{stx_chem} \alias{stx_chem} -\title{Retrieve chemical data} +\title{Retrieve the Standartox Chemical Properties Table} \usage{ stx_chem(silent = FALSE, stx_dir = file.path(tempdir(), "standartox")) } \arguments{ -\item{silent}{logical; If TRUE, suppresses messages. Default is FALSE.} +\item{silent}{logical; If `TRUE`, suppresses progress messages during download. Defaults to `FALSE`.} -\item{stx_dir}{character; Directory to which the chemical information should be downloaded. Default is a temporary directory.} +\item{stx_dir}{character; Directory where the chemical data file is stored or should be downloaded. Defaults to a "standartox" subdirectory within the session's temporary directory (\code{file.path(tempdir(), "standartox")}).} } \value{ -Returns a data.table containing informaiton on chemicals in Standartox. +A \code{data.table} containing chemical information, including columns such as CAS number, chemical name, and other identifiers. } \description{ -Retrieve data on all chemicals in Standartox. +Provides direct access to the `phch` data table, which contains chemical identifiers and physicochemical properties for all compounds in the Standartox database. +} +\details{ +This function is a simple wrapper for \code{stx_download(data_type = 'phch')}. It is designed for users who want to explore the chemical inventory of the database, for instance, to find CAS numbers for use with \code{\link{stx_query}}. + +Like other download functions in this package, it caches the data file locally to avoid re-downloading on subsequent calls. } \examples{ \donttest{ -# might fail if there is no internet connection or Zenodo.org not not available -# basic function call -df = stx_chem() +# This function may fail if there is no internet connection or Zenodo.org is not available + +# Basic function call to retrieve the chemical data table +chem_data <- stx_chem() -# to get verbose output from the function -df = stx_chem(silent = FALSE) +# Inspect the first few rows and column names to see what's available +head(chem_data) +colnames(chem_data) -# to specify a directory to which the chemical information should be downloaded -df = stx_chem(silent = FALSE, stx_dir = "~/tmp") -# This will create a directory under ~/tmp and download the respective standartox file to that directory. -# The files are then permanently stored in that directory and can be directly read when restarting your R session. +# Example of using a permanent directory to cache the data file +# The "Retrieving..." message will only appear on the first download. +my_dir <- file.path("~", "my_stx_data") +cached_chem_data <- stx_chem(stx_dir = my_dir, silent = FALSE) } } diff --git a/man/stx_data.Rd b/man/stx_data.Rd index c29560e..398464e 100644 --- a/man/stx_data.Rd +++ b/man/stx_data.Rd @@ -2,28 +2,41 @@ % Please edit documentation in R/standartox.R \name{stx_data} \alias{stx_data} -\title{Retrieve Standartox toxicity values} +\title{Retrieve the Core Standartox Toxicity Data Table} \usage{ stx_data(silent = FALSE, stx_dir = file.path(tempdir(), "standartox")) } \arguments{ -\item{silent}{logical; If TRUE, suppresses messages. Default is FALSE.} +\item{silent}{logical; If `TRUE`, suppresses progress messages during download. Defaults to `FALSE`.} -\item{stx_dir}{character; Directory to which the catalog should be downloaded. Default is a temporary directory.} +\item{stx_dir}{character; Directory where the data file is stored or should be downloaded. Defaults to a "standartox" subdirectory within the session's temporary directory (\code{file.path(tempdir(), "standartox")}).} } \value{ -Returns a data.table. +A \code{data.table} containing the core toxicity test results, with columns for endpoints, duration, concentration, effect, etc. } \description{ -Retrieve a data.table containing the Standartox toxicity data +Provides direct access to the main `test_fin` data table from the Standartox database, which contains the raw toxicity results. +} +\details{ +This function is a simple wrapper for \code{stx_download(data_type = 'test_fin')}. It is designed for users who want the primary data table of toxicity test results without the additional chemical or detailed taxonomic information that \code{\link{stx_query}} automatically appends. + +Like other download functions in this package, it caches the data file locally to avoid re-downloading on subsequent calls. } \examples{ \donttest{ -# might fail if there is no internet connection or Zenodo.org not not available -# basic function call +# This function may fail if there is no internet connection or Zenodo.org is not available + +# Basic function call to retrieve the main data table +tox_data <- stx_data() -dt = stx_data() +# Inspect the first few rows and column names to understand the structure +head(tox_data) +colnames(tox_data) +# Example of using a permanent directory to cache the data +# The message "Retrieving Standartox data.." will only appear on the first download. +my_dir <- file.path("~", "my_stx_data") +cached_data <- stx_data(stx_dir = my_dir, silent = FALSE) } } \author{ diff --git a/man/stx_query.Rd b/man/stx_query.Rd index 8ea1edd..71e04f7 100644 --- a/man/stx_query.Rd +++ b/man/stx_query.Rd @@ -2,18 +2,22 @@ % Please edit documentation in R/standartox.R \name{stx_query} \alias{stx_query} -\title{Query Standartox Toxicity Data} +\title{Query and Filter Standartox Toxicity Data} \usage{ stx_query( cas_number = NULL, endpoint_group = c("XX50", "NOEX", "LOEX"), + endpoint_qualifier = "=", + endpoint = NULL, exposure = NULL, effect = NULL, + measurement = NULL, duration = c(0, Inf), duration_unit = "h", concentration_unit = NULL, concentration_type = NULL, - tax_columns = c("group", "taxon", "genus", "family"), + organism_lifestage = NULL, + tax_columns = c("tax_group", "tax_taxon", "tax_genus", "tax_family"), tax_genus = NULL, tax_family = NULL, tax_order = NULL, @@ -26,85 +30,105 @@ stx_query( ) } \arguments{ -\item{cas_number}{character; Optional. Vector of CAS numbers to filter chemicals (e.g. \code{c("1071-83-6","63-25-2","138261-41-3")}). Default is \code{NULL} (no filtering).} +\item{cas_number}{character; Optional vector of CAS numbers to filter by (e.g., \code{c("1071-83-6", "63-25-2")}). If `NULL` (default), results for all chemicals are returned.} -\item{endpoint_group}{character; Optional. Endpoint group(s) to filter results. All possible endpoint groups can be checked via \code{stx_catalog()$endpoint_group}. Default is \code{c("XX50", "NOEX", "LOEX")}.} +\item{endpoint_group}{character; Optional vector of endpoint groups to filter results. See \code{stx_catalog()$endpoint_group} for all possible values. Defaults to \code{c("XX50", "NOEX", "LOEX")}.} -\item{exposure}{character; Optional. Vector of exposure types (e.g. \code{"aquatic"}). All possible exposure types can be checked via \code{stx_catalog()$exposure}. Default is \code{NULL}.} +\item{endpoint_qualifier}{character; Optional vector of endpoint value qualifiers (e.g., \code{c(">", "<=")}). Common values are "=", ">", "<", "~", "<=", ">=". Defaults to \code{"="}. Set to `NULL` to include all qualifiers.} -\item{effect}{character; Optional. Vector of effect types (e.g. \code{"Mortality", "Growth"}). All possible effect types can be checked via \code{stx_catalog()$effect}. Default is \code{NULL}.} +\item{endpoint}{character; Optional vector of specific endpoints to filter by (e.g., "LC50", "NOEC"). If `NULL` (default), no endpoint filtering is applied.} -\item{duration}{numeric; Optional. Numeric vector of length two specifying minimum and maximum test duration (in hours), e.g. \code{c(0, 48)}. Default is \code{c(0, Inf)}.} +\item{exposure}{character; Optional vector of exposure types. See \code{stx_catalog()$exposure}. If `NULL` (default), no exposure type filtering is applied.} -\item{duration_unit}{character; Optional. Filter by duration unit (e.g. \code{"h"} for hours). All possible duration units can be checked via \code{stx_catalog()$duration_unit}. Set to \code{NULL} to keep all. Default is \code{"h"}.} +\item{effect}{character; Optional vector of observed effects. See \code{stx_catalog()$effect}. If `NULL` (default), no effect filtering is applied.} -\item{concentration_unit}{character; Optional. Filter by concentration unit (e.g. \code{"g/l"}). All possible concentration units can be checked via \code{stx_catalog()$concentration_unit}. Default is \code{NULL}.} +\item{measurement}{character; Optional vector of measurement types. See \code{stx_catalog()$measurement}. If `NULL` (default), no measurement filtering is applied.} -\item{concentration_type}{character; Optional. Filter by concentration type (e.g. \code{"active ingredient"}). All possible concentration types can be checked via \code{stx_catalog()$concentration_type}. Default is \code{NULL}.} +\item{duration}{numeric; A numeric vector of length two specifying the duration range (in hours) to filter by, e.g., \code{c(24, 96)}. Defaults to \code{c(0, Inf)}, including all durations.} -\item{tax_columns}{character; Columns of taxonomic information to append to results. All possible columns can be checked via \code{colnames(stx_taxa())}. Default is \code{c("group", "taxon", "genus", "family")}.} +\item{duration_unit}{character; Filter by the unit of duration. See \code{stx_catalog()$duration_unit}. Defaults to \code{"h"} (hours). Set to `NULL` to keep all units.} -\item{tax_genus}{character; Optional. Filter by genus. All possible genera can be checked via \code{stx_catalog()$genus}. Default is \code{NULL}.} +\item{concentration_unit}{character; Optional vector of concentration units. See \code{stx_catalog()$concentration_unit}. If `NULL` (default), no filtering is applied.} -\item{tax_family}{character; Optional. Filter by family. All possible families can be checked via \code{stx_catalog()$family}. Default is \code{NULL}.} +\item{concentration_type}{character; Optional vector of concentration types. See \code{stx_catalog()$concentration_type}. If `NULL` (default), no filtering is applied.} -\item{tax_order}{character; Optional. Filter by order. All possible orders can be checked via \code{stx_catalog()$order}. Default is \code{NULL}.} +\item{organism_lifestage}{character; Optional vector of organism lifestages. See \code{stx_catalog()$organism_lifestage}. If `NULL` (default), no lifestage filtering is applied.} -\item{tax_class}{character; Optional. Filter by class. All possible classes can be checked via \code{stx_catalog()$class}. Default is \code{NULL}.} +\item{tax_columns}{character; A vector of taxonomic column names to append to the results. See \code{colnames(stx_taxa())} for all options. Defaults to \code{c("tax_group", "tax_taxon", "tax_genus", "tax_family")}.} -\item{tax_group}{character; Optional. Filter by one or more ecotoxicological groups. Possible values are \code{"invertebrate"}, \code{"plant"}, \code{"fish"}, \code{"fungi"}, \code{"algae"}, \code{"aves"}, \code{"amphibia"}, \code{"mammalia"}, \code{"reptilia"}, \code{"macrophyte"}. All possible ecotox groups can be checked via \code{stx_catalog()$group}. Multiple entries possible. Default is \code{NULL}.} +\item{tax_genus}{character; Optional vector of genera to filter by. See \code{stx_catalog()$genus}. If `NULL` (default), no genus filtering is applied.} -\item{include_reference}{logical; If \code{TRUE}, append reference information. Default is \code{FALSE}.} +\item{tax_family}{character; Optional vector of families to filter by. See \code{stx_catalog()$family}. If `NULL` (default), no family filtering is applied.} -\item{rm_NR}{logical; If \code{TRUE}, remove rows with "NR" (not reported) values. Default is \code{TRUE}.} +\item{tax_order}{character; Optional vector of orders to filter by. See \code{stx_catalog()$order}. If `NULL` (default), no order filtering is applied.} -\item{verbose}{logical; If \code{TRUE}, print progress messages. Default is \code{FALSE}.} +\item{tax_class}{character; Optional vector of classes to filter by. See \code{stx_catalog()$class}. If `NULL` (default), no class filtering is applied.} -\item{...}{Additional arguments passed to \code{stx_download}.} +\item{tax_group}{character; Optional vector of ecotoxicological groups to filter by. See \code{stx_catalog()$tax_group} for valid inputs (e.g., "invertebrate", "fish"). If `NULL` (default), no group filtering is applied.} + +\item{include_reference}{logical; If `TRUE`, reference information (author, year, title) is appended to the results. Defaults to \code{FALSE}.} + +\item{rm_NR}{logical; If `TRUE` (default), rows with "NR" (not reported) values in the critical `endpoint` and `duration_unit` columns are removed early in the query process. Note that at the end of the query, all remaining "NR" values in any character column are converted to `NA`.} + +\item{verbose}{logical; If `TRUE`, prints messages detailing the query progress. Defaults to \code{FALSE}.} + +\item{...}{Additional arguments to be passed on to \code{\link{stx_download}}, such as `stx_dir` to specify a cache directory.} } \value{ -Returns a \code{data.table} with filtered Standartox toxicity data. +A \code{data.table} containing the filtered Standartox toxicity data. If the query results in zero matches, the function returns `NULL` and issues a warning. } \description{ -Retrieve and filter toxicity data from the Standartox database (\url{https://doi.org/10.5281/zenodo.3785030}) using chemical, experimental, and taxonomic criteria. +Retrieves and filters toxicity data from the Standartox database (\url{https://doi.org/10.5281/zenodo.3785030}). This function acts as a powerful front-end for subsetting the database based on chemical, experimental, and taxonomic criteria. } -\examples{ -\donttest{ +\details{ +The function operates in a sequential process: +1. It first downloads the necessary data tables (`test_fin`, `phch`, `taxa`, and optionally `refs`) using \code{\link{stx_download}}. +2. It performs initial, fast filtering based on `endpoint_group`, `endpoint_qualifier`, and `duration_unit`. +3. It then appends chemical and taxonomic information, filtering by `cas_number` and the `tax_*` parameters. +4. Finally, it applies the remaining experimental filters (`effect`, `duration`, `concentration_unit`, etc.). -Basic stx_query() call: Will return results filtered for default endpoint_group = c("XX50", "NOEX", "LOEX") and duration_unit = "h" -stx_query(verbose = T) +By default, filters are combined with a logical "AND". For example, specifying a `tax_genus` and an `effect` will return only records that match both criteria. -# If you wish to filter for different endpoint groups, you can specify them in the query. -stx_catalog()$endpoint_group # to view available endpoint groups -stx_query(endpoint_group = c("Bioconc","MATC","MCIG")) - -# Query for a specific CAS number, endpoint group, and tax group(s) -stx_query( - cas_number = "1071-83-6", - endpoint_group = c("NOEX","LOEX"), - duration = c(0, 120), - ecotox_group = c("invertebrate", "fish", "algae"), +`stx_catalog()` is a helper function that provides a list of all valid filter values for many of this function's parameters. +} +\examples{ +\donttest{ +# This function may fail if there is no internet connection or Zenodo.org is not available + +# Basic query using default filters (XX50, NOEX, LOEX endpoints in hours) +# Using verbose=TRUE to see the process +results_default <- stx_query(verbose = TRUE) + +# To see available filter options, use the catalog function +catalog <- stx_catalog() +print(catalog$endpoint_group) + +# Query for a specific CAS number and multiple specific endpoints for key taxonomic groups +q1 <- stx_query( + cas_number = "1071-83-6", # Glyphosate + endpoint = c("LC50", "EC50", "LOEC", "NOEC"), + duration = c(0, 120), # Up to 120 hours + tax_group = c("invertebrate", "fish", "algae") ) -# get ALL LC50 values for 96 - 120 h of exposure for zebra fish (Danio rerio) -stx_query( - endpoint_group = "XX50", - duration = c(96, 120), - effect = "mortality", - concentration_unit = "g/l", - concentration_type = "active ingredient", +# Get all >LC50 values for Zebra fish (Danio rerio) embryos or larvae, +# including reference information +q2 <- stx_query( + endpoint = "LC50", + endpoint_qualifier = ">", + duration = c(72, 120), tax_genus = "Danio", - include_reference = TRUE + organism_lifestage = c("Embryo", "Larva"), + include_reference = TRUE, + verbose = TRUE ) -# get ALL LC50 values for 24 - 48 h of exposure for the family of Daphniidae -stx_query( +# Get all XX50 acute toxicity values for the family Daphniidae related to mobility +q3 <- stx_query( endpoint_group = "XX50", duration = c(24, 48), - effect = "mortality", - concentration_unit = "g/l", - concentration_type = "active ingredient", - tax_family = "Daphniidae" + measurement = c("immobile", "mobility", "swimming"), + tax_family = "Daphniidae", include_reference = TRUE ) } diff --git a/man/stx_taxa.Rd b/man/stx_taxa.Rd index 6d363a7..6225963 100644 --- a/man/stx_taxa.Rd +++ b/man/stx_taxa.Rd @@ -2,34 +2,45 @@ % Please edit documentation in R/standartox.R \name{stx_taxa} \alias{stx_taxa} -\title{Retrieve taxa data} +\title{Retrieve the Standartox Taxonomy Table} \usage{ stx_taxa(silent = FALSE, stx_dir = file.path(tempdir(), "standartox")) } \arguments{ -\item{silent}{logical; If TRUE, suppresses messages. Default is FALSE.} +\item{silent}{logical; If `TRUE`, suppresses progress messages during download. Defaults to `FALSE`.} -\item{stx_dir}{character; Directory to which the taxa information should be downloaded. Default is a temporary directory.} +\item{stx_dir}{character; Directory where the taxonomy data file is stored or should be downloaded. Defaults to a "standartox" subdirectory within the session's temporary directory (\code{file.path(tempdir(), "standartox")}).} } \value{ -Returns a data.table containing informaiton on taxa in Standartox. +A \code{data.table} containing taxonomic classifications, with columns such as `tax_genus`, `tax_family`, `tax_order`, and `tax_group`. } \description{ -Retrieve data on all taxa in Standartox. +Provides direct access to the `taxa` data table, which contains the complete taxonomic classification for every species in the Standartox database. +} +\details{ +This function is a simple wrapper for \code{stx_download(data_type = 'taxa')}. It is designed for users who want to explore the full taxonomic scope of the database or find valid inputs for the `tax_genus`, `tax_family`, and `tax_group` parameters in \code{\link{stx_query}}. + +Like other download functions in this package, it caches the data file locally to avoid re-downloading on subsequent calls. } \examples{ \donttest{ -# might fail if there is no internet connection or Zenodo.org not not available -# basic function call -df = stx_taxa() +# This function may fail if there is no internet connection or Zenodo.org is not available + +# Basic function call to retrieve the taxonomy data table +taxa_data <- stx_taxa() -# to get verbose output from the function -df = stx_taxa(silent = FALSE) +# Inspect the first few rows and see all available columns +head(taxa_data) +colnames(taxa_data) + +# Find all unique ecotoxicological groups in the database +if (nrow(taxa_data) > 0) { + print(unique(taxa_data$tax_group)) +} -# to specify a directory to which the taxa information should be downloaded -df = stx_taxa(silent = FALSE, stx_dir = "~/tmp") -# This will create a directory under ~/tmp and download the respective standartox file to that directory. -# The files are then permanently stored in that directory and can be directly read when restarting your R session. +# Example of using a permanent directory to cache the data file +my_dir <- file.path("~", "my_stx_data") +cached_taxa_data <- stx_taxa(stx_dir = my_dir, silent = FALSE) } }