Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TOXVAL-798 #193

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 8 additions & 18 deletions R/check.toxval_type.route.units.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ check.toxval_type.route.units <- function(toxval.db,
output_file = paste0("Repo/QC Reports/toxval_type.route.units_to_check_", source, "_", Sys.Date(), ".xlsx")
}

source_string = slist %>%
paste0(., collapse="', '")

# Handle addition of subsource for queries and output_file
query_addition = ""
if(!is.null(subsource)) {
Expand All @@ -30,24 +33,11 @@ check.toxval_type.route.units <- function(toxval.db,
source, "_", subsource, "_", Sys.Date(), ".xlsx")
}

# Track all type/route/units combinations identified
all_combinations = tibble::tibble(
source = character(),
study_type = character(),
exposure_route = character(),
toxval_type = character(),
toxval_units = character(),
)

for(source in slist) {
# Get all type/route/units combinations for current source
curr_combo_query = paste0("SELECT DISTINCT source, study_type, exposure_route, toxval_type, toxval_units ",
"FROM toxval WHERE source ='", source, "'", query_addition)
all_combinations = runQuery(curr_combo_query, toxval.db) %>%
# Append current source data to cumulative DF
dplyr::bind_rows(all_combinations) %>%
dplyr::distinct()
}
# Get all type/route/units combinations for current source
curr_combo_query = paste0("SELECT DISTINCT source, study_type, exposure_route, toxval_type, toxval_units ",
"FROM toxval WHERE source in ('",source_string,"')", query_addition)
all_combinations = runQuery(curr_combo_query, toxval.db) %>%
dplyr::distinct()

# Read in dictionary containing expected combinations, if specified
if(is.null(load.dict)) load.dict = "not a file"
Expand Down
135 changes: 68 additions & 67 deletions R/export.all.by.source.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,81 +31,82 @@ export.all.by.source <- function(toxval.db, source=NULL, subsource=NULL) {
}

if(!is.null(source)) slist=source
source_string = slist %>%
paste0(collapse="', '")

# Handle addition of subsource for queries
query_addition = ""
if(!is.null(subsource)) {
query_addition = paste0(" and subsource='", subsource, "'")
query_addition = paste0(" and b.subsource='", subsource, "'")
}

for(src in slist) {
query = paste0("SELECT
a.dtxsid,a.casrn,a.name,a.cleaned_casrn, a.cleaned_name,
b.source,b.subsource,
b.qc_status,
b.study_group,
b.risk_assessment_class,
b.human_eco,
b.toxval_type,b.toxval_type_original,
b.toxval_subtype,
e.toxval_type_supercategory,
b.toxval_numeric_qualifier,b.toxval_numeric,b.toxval_units,
b.toxval_numeric_original,b.toxval_units_original,
b.study_type,b.study_type_original,
b.study_duration_class,b.study_duration_class_original,
b.study_duration_value,b.study_duration_value_original,
b.study_duration_units,b.study_duration_units_original,
b.species_id,b.species_original,d.common_name,d.latin_name,d.ecotox_group,d.habitat,
b.strain,b.strain_group,b.strain_original,
b.sex,b.sex_original,
b.generation,b.lifestage,
b.exposure_route,b.exposure_route_original,
b.exposure_method,b.exposure_method_original,
b.exposure_form,b.exposure_form_original,
b.media,b.media_original,
b.critical_effect,
b.critical_effect_original,
b.year,
b.datestamp,
f.long_ref,
f.title,
f.author,
f.journal,
f.volume,
f.year,
f.issue,
f.url,
f.document_name,
e.toxval_type_category,
b.source_url,b.subsource_url,
b.toxval_id,b.source_hash,b.source_table,
b.details_text,
b.chemical_id,
b.priority_id
FROM
toxval b
INNER JOIN source_chemical a on a.chemical_id=b.chemical_id
LEFT JOIN species d on b.species_id=d.species_id
INNER JOIN toxval_type_dictionary e on b.toxval_type=e.toxval_type
INNER JOIN record_source f on b.toxval_id=f.toxval_id
WHERE
b.source='",src,"'")
if(!is.null(subsource)) {
query = paste0(query, " and b.subsource='",subsource,"'")
}
query = paste0("SELECT
a.dtxsid,a.casrn,a.name,a.cleaned_casrn, a.cleaned_name,
b.source,b.subsource,
b.qc_status,
b.study_group,
b.risk_assessment_class,
b.human_eco,
b.toxval_type,b.toxval_type_original,
b.toxval_subtype,
e.toxval_type_supercategory,
b.toxval_numeric_qualifier,b.toxval_numeric,b.toxval_units,
b.toxval_numeric_original,b.toxval_units_original,
b.study_type,b.study_type_original,
b.study_duration_class,b.study_duration_class_original,
b.study_duration_value,b.study_duration_value_original,
b.study_duration_units,b.study_duration_units_original,
b.species_id,b.species_original,d.common_name,d.latin_name,d.ecotox_group,d.habitat,
b.strain,b.strain_group,b.strain_original,
b.sex,b.sex_original,
b.generation,b.lifestage,
b.exposure_route,b.exposure_route_original,
b.exposure_method,b.exposure_method_original,
b.exposure_form,b.exposure_form_original,
b.media,b.media_original,
b.critical_effect,
b.critical_effect_original,
b.year,
b.datestamp,
f.long_ref,
f.title,
f.author,
f.journal,
f.volume,
f.year,
f.issue,
f.url,
f.document_name,
e.toxval_type_category,
b.source_url,b.subsource_url,
b.toxval_id,b.source_hash,b.source_table,
b.details_text,
b.chemical_id,
b.priority_id
FROM
toxval b
INNER JOIN source_chemical a on a.chemical_id=b.chemical_id
LEFT JOIN species d on b.species_id=d.species_id
INNER JOIN toxval_type_dictionary e on b.toxval_type=e.toxval_type
INNER JOIN record_source f on b.toxval_id=f.toxval_id
WHERE
b.source IN ('",source_string,"')",
query_addition)

mat = runQuery(query,toxval.db,T,F)
mat[is.na(mat$casrn),"casrn"] = mat[is.na(mat$casrn),"cleaned_casrn"]
mat[mat$casrn=='-',"casrn"] = mat[mat$casrn=='-',"cleaned_casrn"]
mat[is.na(mat$name),"name"] = mat[is.na(mat$name),"cleaned_name"]
mat[mat$name=='-',"name"] = mat[mat$name=='-',"cleaned_name"]
cremove = c("cleaned_name","cleaned_casrn")
mat = mat[ , !(names(mat) %in% cremove)]
mat = unique(mat)

mat = runQuery(query,toxval.db,T,F)
mat[is.na(mat$casrn),"casrn"] = mat[is.na(mat$casrn),"cleaned_casrn"]
mat[mat$casrn=='-',"casrn"] = mat[mat$casrn=='-',"cleaned_casrn"]
mat[is.na(mat$name),"name"] = mat[is.na(mat$name),"cleaned_name"]
mat[mat$name=='-',"name"] = mat[mat$name=='-',"cleaned_name"]
cremove = c("cleaned_name","cleaned_casrn")
mat = mat[ , !(names(mat) %in% cremove)]
mat = unique(mat)
cat(src,nrow(mat),"\n")
for(src in slist) {
src_mat = dplyr::filter(mat, source == !!src)
cat(src,nrow(src_mat),"\n")
file = paste0(dir,"/toxval_all_",toxval.db,"_",src, " ", subsource, ".xlsx") %>%
gsub(" \\.xlsx", ".xlsx", .)
sty = openxlsx::createStyle(halign="center",valign="center",textRotation=90,textDecoration = "bold")
openxlsx::write.xlsx(mat,file,firstRow=T,headerStyle=sty)
writexl::write_xlsx(src_mat,file)
}
}
24 changes: 12 additions & 12 deletions R/fill.toxval.defaults.global.by.source.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@ fill.toxval.defaults.global.by.source <- function(toxval.db, source=NULL, subsou
query_addition = paste0(" and subsource='", subsource, "'")
}

slist = runQuery("select distinct source from toxval",toxval.db)[,1]
if(!is.null(source)) slist = source
for(source in slist) {
cat(source,"\n")
# For each column, set NA or empty values to "-"
for(col in col.list){
n <- runQuery(paste0("select count(*) from toxval where ",col," ='' and source = '",source,"'",query_addition) ,toxval.db)[1,1]
if(n>0) {
cat(col,n,"\n")
query <- paste0("update toxval set ",col,"='-' where ",col," ='' and source = '",source,"'",query_addition)
runQuery(query,toxval.db)
}
slist = source
if(is.null(source)) slist = runQuery("select distinct source from toxval",toxval.db)[,1]
source_string = slist %>%
paste0(., collapse="', '")

# For each column, set NA or empty values to "-"
for(col in col.list){
n <- runQuery(paste0("select count(*) from toxval where ",col," ='' and source in ('",source_string,"')",query_addition) ,toxval.db)[1,1]
if(n>0) {
cat(col,n,"\n")
query <- paste0("update toxval set ",col,"='-' where ",col," ='' and source in ('",source_string,"')",query_addition)
runQuery(query,toxval.db)
}
}
}
149 changes: 78 additions & 71 deletions R/fix.all.param.by.source.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,11 @@ fix.all.param.by.source <- function(toxval.db, source=NULL,subsource=NULL, fill.
}
full_dict = full_dict[!is.na(full_dict$term_original),]
#print(View(full_dict))
slist = runQuery("select distinct source from toxval",toxval.db)[,1]
if(!is.null(source)) slist = source
slist = source
if(is.null(source)) slist = runQuery("select distinct source from toxval",toxval.db)[,1]
slist = sort(slist)
source_string = slist %>%
paste0(., collapse="', '")

# Handle addition of subsource for queries
query_addition = ""
Expand All @@ -56,78 +59,82 @@ fix.all.param.by.source <- function(toxval.db, source=NULL,subsource=NULL, fill.
}

# slist = slist[!is.element(slist,c("ECOTOX","ECHA IUCLID"))]
slist = sort(slist)

for(source in slist) {
cat("\n-----------------------------------------------------\n")
cat(source,subsource,"\n")
cat("-----------------------------------------------------\n")
cat("perform extra processes that require matching between fields\n")
fix.exposure.params(toxval.db, source,subsource)
fix.study_duration.params(toxval.db, source,subsource)
fix.generation.by.source(toxval.db, source,subsource)

cat(" deal with quotes in strings\n")
cat(" exposure_method\n")
runQuery(paste0("update toxval SET exposure_method"," = ", "REPLACE", "( exposure_method", ",\'\"\',", " \"'\" ) WHERE exposure_method"," LIKE \'%\"%\' and source = '",source,"'",query_addition),toxval.db)
cat(" exposure_route\n")
runQuery(paste0("update toxval SET exposure_route"," = ", "REPLACE", "( exposure_route", ",\'\"\',", " \"'\" ) WHERE exposure_route"," LIKE \'%\"%\' and source = '",source,"'",query_addition),toxval.db)
cat(" media\n")
runQuery(paste0("update toxval SET media"," = ", "REPLACE", "( media", ",\'\"\',", " \"'\" ) WHERE media"," LIKE \'%\"%\' and source = '",source,"'",query_addition),toxval.db)
cat(" study_type\n")
runQuery(paste0("update toxval SET study_type"," = ", "REPLACE", "( study_type", ",\'\"\',", " \"'\" ) WHERE study_type"," LIKE \'%\"%\' and source = '",source,"'",query_addition),toxval.db)
cat(" iterate through the full_dict\n")
flist = unique(full_dict$field)
for(field in flist) {
cat(" ",field,"\n")
sdict = full_dict[full_dict$field==field,]
query = paste0("select distinct ",field,"_original from toxval where source='",source,"'",query_addition)
terms = runQuery(query,toxval.db)[,1]
if(length(terms)>0) {
sdict = sdict[is.element(sdict$term_original,terms),]
if(nrow(sdict)>0) {
for(i in 1:nrow(sdict)) {
original = sdict[i,"term_original"]
final = sdict[i,"term_final"]
#if(field=="toxval_units") cat(original,final,"\n")
query = paste0("update toxval set ",field,"=\"",final,"\" where ",field,"_original=\"",original,"\" and source = '",source,"'",query_addition)
runQuery(query, toxval.db)
}
cat("\n-----------------------------------------------------\n")
cat(gsub("', '", ", ", source_string),subsource,"\n")
cat("-----------------------------------------------------\n")
cat("perform extra processes that require matching between fields\n")
fix.exposure.params(toxval.db, slist, subsource)
fix.study_duration.params(toxval.db, slist, subsource)
fix.generation.by.source(toxval.db, slist, subsource)

cat(" deal with quotes in strings\n")
cat(" exposure_method\n")
runQuery(paste0("update toxval SET exposure_method"," = ", "REPLACE", "( exposure_method", ",\'\"\',", " \"'\" ) WHERE exposure_method"," LIKE \'%\"%\' and source in ('",source_string,"')",query_addition),toxval.db)
cat(" exposure_route\n")
runQuery(paste0("update toxval SET exposure_route"," = ", "REPLACE", "( exposure_route", ",\'\"\',", " \"'\" ) WHERE exposure_route"," LIKE \'%\"%\' and source in ('",source_string,"')",query_addition),toxval.db)
cat(" media\n")
runQuery(paste0("update toxval SET media"," = ", "REPLACE", "( media", ",\'\"\',", " \"'\" ) WHERE media"," LIKE \'%\"%\' and source in ('",source_string,"')",query_addition),toxval.db)
cat(" study_type\n")
runQuery(paste0("update toxval SET study_type"," = ", "REPLACE", "( study_type", ",\'\"\',", " \"'\" ) WHERE study_type"," LIKE \'%\"%\' and source in ('",source_string,"')",query_addition),toxval.db)
cat(" iterate through the full_dict\n")
flist = unique(full_dict$field)
for(field in flist) {
cat(" ",field,"\n")
sdict = full_dict[full_dict$field==field,]
query = paste0("select distinct ",field,"_original from toxval where source in ('",source_string,"')",query_addition)
terms = runQuery(query,toxval.db)[,1]
if(length(terms)>0) {
sdict = sdict[is.element(sdict$term_original,terms),]
if(nrow(sdict)>0) {
for(i in 1:nrow(sdict)) {
original = sdict[i,"term_original"]
final = sdict[i,"term_final"]
#if(field=="toxval_units") cat(original,final,"\n")
query = paste0("update toxval set ",field,"=\"",final,"\" where ",field,"_original=\"",original,"\" and source in ('",source_string,"')",query_addition)
runQuery(query, toxval.db)
}
}
}

query <- paste0("update toxval set ",field,"='-' where ",field,"_original is NULL and source = '",source,"'",query_addition)
runQuery(query, toxval.db)

cat(" expoure route\n")
query <- paste0("update toxval
set exposure_route = 'inhalation'
where toxval_type in ('RFCi', 'Inhalation Unit Risk', 'IUR', 'Inhalation UR', 'Inhalation TC', 'Inhalation SF') and source = '",source,"'",query_addition)
runQuery(query, toxval.db)

query = paste0("update toxval
set exposure_route = 'oral'
where toxval_type in ('RFDo', 'Oral Slope Factor', 'oral TDI', 'oral SF', 'oral ADI', 'LDD50 (Lethal Dietary Dose)') and source = '",source,"'",query_addition)
runQuery(query, toxval.db)

cat(" study_duration_class\n")
query = paste0("update toxval
set study_duration_class = 'acute'
where toxval_type in ('ARFD', 'ARFD (group)', 'AAOEL') and source = '",source,"'",query_addition)

query = paste0("update toxval
set study_duration_class = 'chronic'
where toxval_type like 'Chronic%' and source = '",source,"'",query_addition)
runQuery(query, toxval.db)

cat(" toxval_subtype\n")
query = paste0("UPDATE toxval SET toxval_subtype = '-' ",
"WHERE toxval_subtype IN ('chronic', 'subchronic', 'intermediate', 'acute', 'developmental') ",
"AND toxval_type IN (SELECT DISTINCT toxval_type FROM toxval_type_dictionary ",
"WHERE toxval_type_supercategory = 'Point of Departure')")
runQuery(query, toxval.db)

export.missing.dictionary.entries(toxval.db,source,subsource)
}

query <- paste0("update toxval set ",field,"='-' where ",field,"_original is NULL and source in ('",source_string,"')",query_addition)
runQuery(query, toxval.db)

cat(" expoure route\n")
query <- paste0("update toxval
set exposure_route = 'inhalation'
where toxval_type in ('RFCi', 'Inhalation Unit Risk', 'IUR', 'Inhalation UR', 'Inhalation TC', 'Inhalation SF') and source in ('",source_string,"')",query_addition)
runQuery(query, toxval.db)

query = paste0("update toxval
set exposure_route = 'oral'
where toxval_type in ('RFDo', 'Oral Slope Factor', 'oral TDI', 'oral SF', 'oral ADI', 'LDD50 (Lethal Dietary Dose)') and source in ('",source_string,"')",query_addition)
runQuery(query, toxval.db)

query = paste0("update toxval ",
"SET exposure_route = 'oral' ",
"WHERE (exposure_route = '-' or exposure_route_original = '-') and toxval_units = 'mg/kg-day' and ",
"(toxval_type in ('NEL', 'LEL', 'LOEL', 'NOEL', 'NOAEL', 'LOAEL') or toxval_type like 'BMD%') and ",
"source in ('",source_string,"')",query_addition)
runQuery(query, toxval.db)

cat(" study_duration_class\n")
query = paste0("update toxval
set study_duration_class = 'acute'
where toxval_type in ('ARFD', 'ARFD (group)', 'AAOEL') and source in ('",source_string,"')",query_addition)

query = paste0("update toxval
set study_duration_class = 'chronic'
where toxval_type like 'Chronic%' and source in ('",source_string,"')",query_addition)
runQuery(query, toxval.db)

cat(" toxval_subtype\n")
query = paste0("UPDATE toxval SET toxval_subtype = '-' ",
"WHERE toxval_subtype IN ('chronic', 'subchronic', 'intermediate', 'acute', 'developmental') ",
"AND toxval_type IN (SELECT DISTINCT toxval_type FROM toxval_type_dictionary ",
"WHERE toxval_type_supercategory = 'Point of Departure') AND source in ('",source_string,"')")
runQuery(query, toxval.db)

export.missing.dictionary.entries(toxval.db,slist,subsource)
}
Loading