From 2f662e70d55a1fe9a5fbc7c5cdab728b63fb04f1 Mon Sep 17 00:00:00 2001 From: junder873 Date: Tue, 4 Jan 2022 18:13:05 -0800 Subject: [PATCH] fix merge when one column is missing --- Project.toml | 2 +- src/mergeFunctions.jl | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index a35e0d4..971882e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "WRDSMerger" uuid = "59d27aa3-834e-4232-9046-52ef43e86786" authors = ["junder873 "] -version = "0.3.1" +version = "0.3.2" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/src/mergeFunctions.jl b/src/mergeFunctions.jl index 25ca048..af629f2 100644 --- a/src/mergeFunctions.jl +++ b/src/mergeFunctions.jl @@ -225,9 +225,10 @@ end function adjust_date_cols(df::DataFrame, table::LinkTable, date_min::Date, date_max::Date) if ismissing(table.date_col_max) && !ismissing(table.date_col_min) + col = Dict(table.type_translations...)[table.id_cols[1]] |> string df[!, table.date_col_min] = coalesce.(df[:, table.date_col_min], date_min) - sort!(df, [table.id_cols[1], table.date_col_min]) - gdf = groupby(df, [table.id_cols[1]]) + sort!(df, [col, table.date_col_min]) + gdf = groupby(df, [col]) df = transform(gdf, table.date_col_min => lead => "date_max") df[!, "date_max"] = coalesce.(df[:, "date_max"] .- Day(1), date_max)# I subtract a day since use <= later table.date_col_max = "date_max"