diff --git a/CHANGELOG.md b/CHANGELOG.md index f0f53d5..e69aa83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,25 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [0.5.0] - February 23rd, 2024 + +Patch release to add in some new features and fixes + +### Added + +- Added Getters for Drug Exposures + + * GetDrugExposureStartDate + + * GetDrugExposureEndDate + +- Misc + + * Minor fixes in GetPatientAgeGroup Function + +- Extensive test suite for new features + + ## [0.4.0] - December 1st, 2023 Patch release to add in some new features and fixes diff --git a/CITATION.cff b/CITATION.cff index a2e6a12..b5445be 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -10,7 +10,7 @@ authors: given-names: Fareeda - family-names: Sanjay given-names: Jay -title: "OMOPCDMCohortCreator 0.4.0" -version: 0.4.0 +title: "OMOPCDMCohortCreator 0.5.0" +version: 0.5.0 doi: 10.5281/zenodo.7052105 -date-released: 2023-12-01 +date-released: 2024-02-23 diff --git a/Project.toml b/Project.toml index c788790..f886c27 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "OMOPCDMCohortCreator" uuid = "f525a15e-a73f-4eef-870f-f901257eae22" authors = ["Jacob Zelko ", "Fareeda Abdelazeez", "Varshini Chinta", "Jay Sanjay "] -version = "0.4.0" +version = "0.5.0" [deps] DBInterface = "a10d1c49-ce27-4219-8d33-6db1a4562965" diff --git a/docs/src/api.md b/docs/src/api.md index 2c126b6..890852c 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -32,6 +32,8 @@ GetCohortSubjects GetCohortSubjectStartDate GetCohortSubjectEndDate GetVisitProcedure +GetDrugExposureEndDate +GetDrugExposureStartDate ``` ## Filters diff --git a/src/getters.jl b/src/getters.jl index 5df2887..7e6ce34 100644 --- a/src/getters.jl +++ b/src/getters.jl @@ -432,6 +432,7 @@ GetPatientAgeGroup( [80, 89], ], tab = person, + ungrouped_label = "Unspecified" ) Finds all individuals in age groups as specified by `age_groupings`. @@ -451,6 +452,8 @@ Finds all individuals in age groups as specified by `age_groupings`. - any year provided by a user as long as it is an `Integer` (such as 2022, 1998, etc.) - `tab` - the `SQLTable` representing the Person table; default `person` + +- `ungrouped_label` - the label to assign persons who do not fit to a provided matching age group; default label "Unspecified" # Returns @@ -483,10 +486,11 @@ function GetPatientAgeGroup( [70, 79], [80, 89], ], - tab=person + tab=person, + ungrouped_label = "Unspecified" ) - df = DBInterface.execute(conn, GetPatientAgeGroup(ids; minuend=minuend, age_groupings=age_groupings, tab=tab)) |> DataFrame + df = DBInterface.execute(conn, GetPatientAgeGroup(ids; minuend=minuend, age_groupings=age_groupings, tab=tab, ungrouped_label=ungrouped_label)) |> DataFrame return df @@ -504,7 +508,9 @@ age_groupings=[ [60, 69], [70, 79], [80, 89], -], tab = person) +], +tab = person, +ungrouped_label = "Unspecified") Given a `DataFrame` with a `:person_id` column, return the `DataFrame` with an associated `:ageGroup` for each person in the `DataFrame` @@ -515,13 +521,13 @@ function GetPatientAgeGroup( df::DataFrame, conn; minuend=:now, - tab=person + tab=person, + ungrouped_label = "Unspecified" ) df_ids= df[:,"person_id"] - - return outerjoin(GetPatientAgeGroup(df_ids, conn; minuend=minuend, tab=tab), df, on = :person_id) + return outerjoin(GetPatientAgeGroup(df_ids, conn; minuend=minuend, tab=tab, ungrouped_label=ungrouped_label), df, on = :person_id) end """ @@ -540,6 +546,7 @@ GetPatientAgeGroup( [80, 89], ], tab = person, + ungrouped_label = "Unspecified" ) Return SQL statement that assigns an age group to each patient in a given patient list. @@ -559,6 +566,8 @@ Customized age groupings can be provided as a list. - `tab` - the `SQLTable` representing the Person table; default `person` +- `ungrouped_label` - the label to assign persons who do not fit to a provided matching age group; default label "Unspecified" + # Returns - `df::DataFrame` - a two column `DataFrame` comprised of columns: `:person_id` and `:age_group` @@ -589,7 +598,8 @@ function GetPatientAgeGroup( [70, 79], [80, 89], ], - tab=person + tab=person, + ungrouped_label = "Unspecified" ) minuend = _determine_calculated_year(minuend) @@ -603,7 +613,7 @@ function GetPatientAgeGroup( sql = From(tab) |> Where(Fun.in(Get.person_id, ids...)) |> Select(Get.person_id, :age => minuend .- Get.year_of_birth) |> - Define(:age_group => Fun.case(age_arr...)) |> + Define(:age_group => Fun.case(age_arr..., ungrouped_label)) |> Select(Get.person_id, Get.age_group) |> q -> render(q, dialect=dialect) @@ -1885,4 +1895,174 @@ function GetDatabaseCohorts( end -export GetDatabasePersonIDs, GetPatientState, GetPatientGender, GetPatientRace, GetPatientAgeGroup, GetPatientVisits, GetMostRecentConditions, GetMostRecentVisit, GetVisitCondition, GetPatientEthnicity, GetDatabaseYearRange, GetVisitPlaceOfService, GetVisitConcept, GetVisitDate, GetDrugExposures, GetDrugConceptIDs, GetDrugAmounts, GetVisitProcedure, GetDatabaseCohorts, GetCohortSubjects, GetCohortSubjectStartDate, GetCohortSubjectEndDate, GetDrugExposureIDs +""" +function GetDrugExposureEndDate(drug_exposure_ids, conn; tab = drug_exposure) + +Given a list of drug_exposure IDs, find their exposure end dates. + +# Arguments: + +- `drug_exposure_ids` - list of `drug_exposure_id`'s; each ID must be of subtype `Float64` + +- `conn` - database connection using DBInterface + + +# Keyword Arguments: + +- `tab` - the `SQLTable` representing the Drug Exposure table; default `drug_exposure` + +# Returns + +- `df::DataFrame` - a two column `DataFrame` comprised of columns: `:drug_exposure_id` and `:drug_exposure_end_date` +""" + +function GetDrugExposureEndDate( + drug_exposure_ids, + conn; + tab = drug_exposure +) + + df = DBInterface.execute(conn, GetDrugExposureEndDate(drug_exposure_ids; tab=tab)) |> DataFrame + + return df +end + +""" +function GetDrugExposureEndDate(df:DataFrame, conn; tab = drug_exposure) + +Given a DataFrame with a :drug_exposure_id column, return the DataFrame with an associated :drug_exposure_end_date corresponding to a given drug_exposure_id in the DataFrame. + +Multiple dispatch that accepts all other arguments like in ` GetDrugExposureEndDate(ids, conn; tab = drug_exposure)` +""" + +function GetDrugExposureEndDate( + df::DataFrame, + conn; + tab = drug_exposure +) + + df_ids = df[:,"drug_exposure_id"] + + return outerjoin(GetDrugExposureEndDate(df_ids, conn; tab=tab), df, on = :drug_exposure_id) + +end + +""" +function GetDrugExposureEndDate(drug_exposure_ids; tab = drug_exposure) + +Given a list of drug_exposure IDs, find their corresponding drug_exposure_end_date ID. + +# Arguments: + +- `drug_exposure_ids` - list of `drug_exposure_id`'s; each ID must be of subtype `Float64` + + +# Keyword Arguments: + +- `tab` - the `SQLTable` representing the Drug Exposure table; default `drug_exposure` + +# Returns + +- SQL statement comprised of: `:drug_exposure_id` and `:drug_exposure_end_date` +""" +function GetDrugExposureEndDate( + drug_exposure_ids; + tab = drug_exposure +) + + sql = + From(tab) |> + Where(Fun.in(Get.drug_exposure_id, drug_exposure_ids...)) |> + Select(Get.drug_exposure_id, Get.drug_exposure_end_date) |> + q -> render(q, dialect=dialect) + + return String(sql) + +end + +""" +function GetDrugExposureStartDate(drug_exposure_ids, conn; tab = drug_exposure) + +Given a list of drug_exposure IDs, find their exposure start dates. + +# Arguments: + +- `drug_exposure_ids` - list of `drug_exposure_id`'s; each ID must be of subtype `Float64` + +- `conn` - database connection using DBInterface + + +# Keyword Arguments: + +- `tab` - the `SQLTable` representing the Drug Exposure table; default `drug_exposure` + +# Returns + +- `df::DataFrame` - a two column `DataFrame` comprised of columns: `:drug_exposure_id` and `:drug_exposure_start_date` +""" + +function GetDrugExposureStartDate( + drug_exposure_ids, + conn; + tab = drug_exposure +) + + df = DBInterface.execute(conn, GetDrugExposureStartDate(drug_exposure_ids; tab=tab)) |> DataFrame + + return df +end + +""" +function GetDrugExposureStartDate(df:DataFrame, conn; tab = drug_exposure) + +Given a DataFrame with a :drug_exposure_id column, return the DataFrame with an associated :drug_exposure_start_date corresponding to a given drug_exposure_id in the DataFrame. + +Multiple dispatch that accepts all other arguments like in ` GetDrugExposureStartDate(ids, conn; tab = drug_exposure)` +""" +function GetDrugExposureStartDate( + df::DataFrame, + conn; + tab = drug_exposure +) + + df_ids = df[:,"drug_exposure_id"] + + return outerjoin(GetDrugExposureStartDate(df_ids, conn; tab=tab), df, on = :drug_exposure_id) + +end + +""" +function GetDrugExposureStartDate(drug_exposure_ids; tab = drug_exposure) + + + Given a list of drug_exposure IDs, find their corresponding drug_exposure_start_date ID. + + # Arguments: + + - `drug_exposure_ids` - list of `drug_exposure_id`'s; each ID must be of subtype `Float64` + + + # Keyword Arguments: + + - `tab` - the `SQLTable` representing the Drug Exposure table; default `drug_exposure` + + # Returns + + - SQL statement comprised of: `:drug_exposure_id` and `:drug_exposure_start_date` +""" +function GetDrugExposureStartDate( + drug_exposure_ids; + tab = drug_exposure +) + + sql = + From(tab) |> + Where(Fun.in(Get.drug_exposure_id, drug_exposure_ids...)) |> + Select(Get.drug_exposure_id, Get.drug_exposure_start_date) |> + q -> render(q, dialect=dialect) + + return String(sql) + +end + +export GetDatabasePersonIDs, GetPatientState, GetPatientGender, GetPatientRace, GetPatientAgeGroup, GetPatientVisits, GetMostRecentConditions, GetMostRecentVisit, GetVisitCondition, GetPatientEthnicity, GetDatabaseYearRange, GetVisitPlaceOfService, GetVisitConcept, GetVisitDate, GetDrugExposures, GetDrugConceptIDs, GetDrugAmounts, GetVisitProcedure, GetDatabaseCohorts, GetCohortSubjects, GetCohortSubjectStartDate, GetCohortSubjectEndDate, GetDrugExposureIDs, GetDrugExposureEndDate, GetDrugExposureStartDate diff --git a/test/sqlite/getters.jl b/test/sqlite/getters.jl index fd3c238..4d68cea 100644 --- a/test/sqlite/getters.jl +++ b/test/sqlite/getters.jl @@ -65,7 +65,7 @@ end push!(age_groups, default_age_grouping_values[idx]) break elseif ismissing(grouping) - push!(age_groups, missing) + push!(age_groups, "Unspecified") end end end @@ -74,7 +74,7 @@ end default_test = default_test[!, [:person_id, :age_group]] default_test.age_group = convert(Vector{Union{Missing,String}}, default_test.age_group) - minuend_now_test = DataFrame(:person_id => [6.0, 123.0, 129.0, 16.0, 65.0, 74.0, 42.0, 187.0, 18.0, 111.0], :age_group => ["55 - 59", "70 - 74", "45 - 49", "50 - 54", "55 - 59", "50 - 54", missing, "75 - 79", "55 - 59", "45 - 49"]) + minuend_now_test = DataFrame(:person_id => [6.0, 123.0, 129.0, 16.0, 65.0, 74.0, 42.0, 187.0, 18.0, 111.0], :age_group => ["55 - 59", "70 - 74", "45 - 49", "50 - 54", "55 - 59", "50 - 54", "Unspecified", "75 - 79", "55 - 59", "45 - 49"]) @test isequal(default_test, GetPatientAgeGroup(test_ids, sqlite_conn; minuend=default_minuend, age_groupings=default_age_grouping)) @test isequal(minuend_now_test, GetPatientAgeGroup(test_ids, sqlite_conn; minuend=minuend_now, age_groupings=test_age_grouping_2)) @@ -341,7 +341,7 @@ end push!(age_groups, default_age_grouping_values[idx]) break elseif ismissing(grouping) - push!(age_groups, missing) + push!(age_groups, "Unspecified") end end end @@ -526,6 +526,40 @@ end @test test_ids == new[1:1] end +@testset "GetDrugExposureEndDate" begin + + test_drug_exposure_ids = [1.0, 2.0, 3.0, 4.0, 5.0] + + test_drug_exposure_end_date_ids = [-364953600, 31449600, -532483200, -80006400, 1330387200] + + res = sort(GetDrugExposureEndDate(test_drug_exposure_ids, sqlite_conn)) + test_df1 = DataFrame(drug_exposure_id = test_drug_exposure_ids, drug_exposure_end_date = res.drug_exposure_end_date[1:5]) + + new = GetDrugExposureEndDate(test_df1[:,"drug_exposure_id"], sqlite_conn) + + @test test_drug_exposure_end_date_ids == res.drug_exposure_end_date[1:5] + @test new.drug_exposure_end_date[1:5] == test_df1.drug_exposure_end_date[1:5] + @test isa(GetDrugExposureEndDate(test_drug_exposure_ids, sqlite_conn), DataFrame) + +end + +@testset "GetDrugExposureStartDate" begin + + test_drug_exposure_ids = [1.0, 2.0, 3.0, 4.0, 5.0] + + test_drug_exposure_start_date_ids = [-3.727296e8, 2.90304e7, -5.333472e8, -8.18208e7, 1.3291776e9] + + res = sort(GetDrugExposureStartDate(test_drug_exposure_ids, sqlite_conn)) + test_df1 = DataFrame(drug_exposure_id = test_drug_exposure_ids, drug_exposure_start_date = res.drug_exposure_start_date[1:5]) + + new = GetDrugExposureStartDate(test_df1[:,"drug_exposure_id"], sqlite_conn) + + @test test_drug_exposure_start_date_ids == res.drug_exposure_start_date[1:5] + @test new.drug_exposure_start_date[1:5] == test_df1.drug_exposure_start_date[1:5] + @test isa(GetDrugExposureStartDate(test_drug_exposure_ids, sqlite_conn), DataFrame) + +end + @testset "GetVisitProcedure Tests" begin test_visit_occurrence_ids = [22951.0, 23670.0, 26205.0, 26759.0, 27401.0, 28537.0, 29330.0, 30237.0, 31282.0, 32616.0]